File size: 1,601 Bytes
8d82201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
#SBATCH --job-name=mlw010-or2
#SBATCH --partition=a6000
#SBATCH --gres=gpu:4
#SBATCH --time=13-11:30:00 # d-hh:mm:ss ??��?, ��??? job?? max time limit ???�� 
#SBATCH --mem=64000 # cpu memory size 
#SBATCH --cpus-per-task=12 # cpu �Ʃ���? 
#SBATCH --output=./logs/gref_m05_tmp010_4gpu_bs32_orig.log

ml purge
ml load cuda/11.3
eval "$(conda shell.bash hook)"
conda activate cris 

cd /data2/projects/chaeyun/LAVT-RIS/

# todo
# gref_m05_tmp010_4gpu_bs32_orig
# mlw 0.05 margin 10 tmp 0.10 original
# sbatch ./scripts/baseline_test_lr2.sh ./models/gref_m05_tmp010_4gpu_bs32_orig gref_m05_tmp010_4gpu_bs32_orig 10 0.10 hardpos_only 0.05


# gref_m10_tmp005_4gpu_bs32
# margin 10 tmp 0.05 refined
# sbatch ./scripts/baseline_test_lr2.sh ./models/gref_m10_tmp005_4gpu_bs32 gref_m10_tmp005_4gpu_bs32 10 0.05 hardpos_only_refined 0.10




# margin temp mlw

export NCCL_P2P_DISABLE=1
export NVIDIA_TF32_OVERRIDE=0

GPUS=4
OUTPUT_DIR=$1
EXP_NAME=$2
MARGIN=$3
TEMP=$4
MODE=$5
MLW=$6


# TRAIN
# hardpos_only, hardpos_only_rev
python_args="--model lavt_one \
--dataset refcocog \
--splitBy umd \
--output-dir ${OUTPUT_DIR} \
--model_id ${EXP_NAME} \
--batch-size 8 \
--lr 0.00005 \
--wd 1e-2 \
--swin_type base \
--pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth \
--epochs 40 \
--img_size 480 \
--metric_learning \
--margin_value ${MARGIN} \
--temperature ${TEMP} \
--metric_mode ${MODE} \
--hp_selection naive \
--metric_loss_weight ${MLW} \
--exclude_multiobj "

python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=3928 train.py $python_args