| #SBATCH --job-name=angle2 | |
| #SBATCH --partition=a6000 | |
| #SBATCH --gres=gpu:4 | |
| #SBATCH --time=13-11:30:00 # d-hh:mm:ss ??��??, ��???? job?? max time limit ????�� | |
| #SBATCH --mem=60000 # cpu memory size | |
| #SBATCH --cpus-per-task=8 # cpu ����?����?? | |
| #SBATCH --output=./logs/gref_m10_mg10_tmp007_4gpu_bs32_ang.log | |
| ml purge | |
| ml load cuda/11.3 | |
| eval "$(conda shell.bash hook)" | |
| conda activate cris | |
| cd /data2/projects/chaeyun/LAVT-RIS/ | |
| # gref_m10_mg10_tmp007_4gpu_bs32_ang | |
| # # sbatch ./scripts/baseline_test_lr_angle2.sh ./models/gref_m10_mg10_tmp007_4gpu_bs32_ang gref_m10_mg10_tmp007_4gpu_bs32_ang 10 0.07 hardpos_only 0.10 | |
| # todo after 241208 | |
| # gref_m10_mg10_tmp010_4gpu_bs32_ang | |
| # mlw 0.10 margin 8 tmp 0.10 original | |
| # # bash ./scripts/baseline_test_lr_angle.sh ./models/gref_m10_mg10_tmp010_4gpu_bs32_ang gref_m10_mg10_tmp010_4gpu_bs32_ang 10 0.10 hardpos_only 0.10 > ./logs/gref_m10_mg10_tmp010_4gpu_bs32_ang.log 2>&1 | |
| # # bash ./scripts/baseline_test_lr.sh ./models/gref_m10_mg08_tmp010_4gpu_bs32_ang gref_m10_mg08_tmp010_4gpu_bs32_ang 8 0.10 hardpos_only 0.10 > ./logs/gref_m10_mg08_tmp010_4gpu_bs32_ang.log 2>&1 | |
| # gref_m10_mg12_tmp007_4gpu_bs32_ang | |
| # gref_m10_mg12_tmp010_4gpu_bs32_ang | |
| # gref_m10_mg10_tmp007_4gpu_bs32_ang | |
| # gref_m10_mg10_tmp010_4gpu_bs32_ang | |
| # gref_m10_mg15_tmp010_4gpu_bs32_ang | |
| export NCCL_P2P_DISABLE=1 | |
| export NVIDIA_TF32_OVERRIDE=0 | |
| GPUS=4 | |
| OUTPUT_DIR=$1 | |
| EXP_NAME=$2 | |
| MARGIN=$3 | |
| TEMP=$4 | |
| MODE=$5 | |
| MLW=$6 | |
| # # Create the directory if it does not exist | |
| # if [[ ! -d "${OPT_DIR}/${EXP_NAME}" ]]; then | |
| # echo "Directory ${OPT_DIR}/${EXP_NAME} does not exist. Creating it..." | |
| # mkdir -p "${OPT_DIR}/${EXP_NAME}" | |
| # fi | |
| # TRAIN | |
| # hardpos_only, hardpos_only_rev | |
| python_args="--model lavt_one \ | |
| --dataset refcocog \ | |
| --splitBy umd \ | |
| --output-dir ${OUTPUT_DIR} \ | |
| --model_id ${EXP_NAME} \ | |
| --batch-size 8 \ | |
| --lr 0.00005 \ | |
| --wd 1e-2 \ | |
| --swin_type base \ | |
| --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth \ | |
| --epochs 40 \ | |
| --img_size 480 \ | |
| --metric_learning \ | |
| --margin_value ${MARGIN} \ | |
| --temperature ${TEMP} \ | |
| --metric_mode ${MODE} \ | |
| --metric_loss_weight ${MLW} \ | |
| --exclude_multiobj " | |
| python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=6872 train_angle.py $python_args | |
| # sbatch ./scripts/baseline_test_lr.sh ./models/gref_hp10_m10_tmp003 gref_hp10_m10_tmp003 10 0.03 hardpos_only_refined | |
| # sbatch ./scripts/baseline_test_lr.sh ./models/gref_hp10_m10_tmp010 gref_hp10_m10_tmp010 10 0.10 hardpos_only_refined | |
| # sbatch ./scripts/baseline_test_lr.sh ./models/gref_hp10_m15_tmp005 gref_hp10_m15_tmp005 15 0.05 hardpos_only_refined | |
| # sbatch ./scripts/baseline_test_lr.sh ./models/gref_hp10_m20_tmp005 gref_hp10_m20_tmp005 20 0.05 hardpos_only_refined | |
| # python test.py --model lavt_one --swin_type base --dataset refcocog --splitBy umd --split test --resume ./models/gref_umd/lavt_test_lr/gref_testlr_4gpu.pth --workers 4 --ddp_trained_weights --window12 --img_size 480 | |