VRIS_vip / LAVT-RIS /scripts /baseline_test_lr_angle2.sh

Upload folder using huggingface_hub

8d82201 verified 11 months ago

2.95 kB

	#!/bin/bash
	#SBATCH --job-name=angle2
	#SBATCH --partition=a6000
	#SBATCH --gres=gpu:4
	#SBATCH --time=13-11:30:00 # d-hh:mm:ss ??��??, ��???? job?? max time limit ????��
	#SBATCH --mem=60000 # cpu memory size
	#SBATCH --cpus-per-task=8 # cpu ��?��??
	#SBATCH --output=./logs/gref_m10_mg10_tmp007_4gpu_bs32_ang.log

	ml purge
	ml load cuda/11.3
	eval "$(conda shell.bash hook)"
	conda activate cris

	cd /data2/projects/chaeyun/LAVT-RIS/

	# gref_m10_mg10_tmp007_4gpu_bs32_ang
	# # sbatch ./scripts/baseline_test_lr_angle2.sh ./models/gref_m10_mg10_tmp007_4gpu_bs32_ang gref_m10_mg10_tmp007_4gpu_bs32_ang 10 0.07 hardpos_only 0.10

	# todo after 241208
	# gref_m10_mg10_tmp010_4gpu_bs32_ang
	# mlw 0.10 margin 8 tmp 0.10 original
	# # bash ./scripts/baseline_test_lr_angle.sh ./models/gref_m10_mg10_tmp010_4gpu_bs32_ang gref_m10_mg10_tmp010_4gpu_bs32_ang 10 0.10 hardpos_only 0.10 > ./logs/gref_m10_mg10_tmp010_4gpu_bs32_ang.log 2>&1


	# # bash ./scripts/baseline_test_lr.sh ./models/gref_m10_mg08_tmp010_4gpu_bs32_ang gref_m10_mg08_tmp010_4gpu_bs32_ang 8 0.10 hardpos_only 0.10 > ./logs/gref_m10_mg08_tmp010_4gpu_bs32_ang.log 2>&1


	# gref_m10_mg12_tmp007_4gpu_bs32_ang
	# gref_m10_mg12_tmp010_4gpu_bs32_ang
	# gref_m10_mg10_tmp007_4gpu_bs32_ang
	# gref_m10_mg10_tmp010_4gpu_bs32_ang
	# gref_m10_mg15_tmp010_4gpu_bs32_ang



	export NCCL_P2P_DISABLE=1
	export NVIDIA_TF32_OVERRIDE=0

	GPUS=4
	OUTPUT_DIR=$1
	EXP_NAME=$2
	MARGIN=$3
	TEMP=$4
	MODE=$5
	MLW=$6


	# # Create the directory if it does not exist
	# if [[ ! -d "${OPT_DIR}/${EXP_NAME}" ]]; then
	# echo "Directory ${OPT_DIR}/${EXP_NAME} does not exist. Creating it..."
	# mkdir -p "${OPT_DIR}/${EXP_NAME}"
	# fi


	# TRAIN
	# hardpos_only, hardpos_only_rev
	python_args="--model lavt_one \
	--dataset refcocog \
	--splitBy umd \
	--output-dir ${OUTPUT_DIR} \
	--model_id ${EXP_NAME} \
	--batch-size 8 \
	--lr 0.00005 \
	--wd 1e-2 \
	--swin_type base \
	--pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth \
	--epochs 40 \
	--img_size 480 \
	--metric_learning \
	--margin_value ${MARGIN} \
	--temperature ${TEMP} \
	--metric_mode ${MODE} \
	--metric_loss_weight ${MLW} \
	--exclude_multiobj "

	python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=6872 train_angle.py $python_args

	# sbatch ./scripts/baseline_test_lr.sh ./models/gref_hp10_m10_tmp003 gref_hp10_m10_tmp003 10 0.03 hardpos_only_refined
	# sbatch ./scripts/baseline_test_lr.sh ./models/gref_hp10_m10_tmp010 gref_hp10_m10_tmp010 10 0.10 hardpos_only_refined
	# sbatch ./scripts/baseline_test_lr.sh ./models/gref_hp10_m15_tmp005 gref_hp10_m15_tmp005 15 0.05 hardpos_only_refined
	# sbatch ./scripts/baseline_test_lr.sh ./models/gref_hp10_m20_tmp005 gref_hp10_m20_tmp005 20 0.05 hardpos_only_refined


	# python test.py --model lavt_one --swin_type base --dataset refcocog --splitBy umd --split test --resume ./models/gref_umd/lavt_test_lr/gref_testlr_4gpu.pth --workers 4 --ddp_trained_weights --window12 --img_size 480