| DATA: | |
| dataset: refcoco | |
| train_split: train_seen | |
| train_lmdb: path/open_lmdb/refcoco/train_seen.lmdb | |
| val_seen_split: val_seen | |
| val_seen_lmdb: path/open_lmdb/refcoco/val_seen.lmdb | |
| val_unseen_split: val_unseen | |
| val_unseen_lmdb: path/open_lmdb/refcoco/val_unseen.lmdb | |
| mask_root: path/masks/refcoco | |
| TRAIN: | |
| swin_type: base | |
| swin_pretrain: path/swin_base_patch4_window12_384_22k.pth | |
| bert: bert-base-uncased | |
| clip_pretrain: path/pretrain/ViT-L-14-336px.pt | |
| mha: '8-8-8-8' | |
| input_size: 480 | |
| clip_dim: 768 | |
| word_len: 20 | |
| num_token: 2 | |
| word_dim: 768 | |
| vis_dim: 512 | |
| token_dim: 512 | |
| sync_bn: True | |
| dropout: 0. | |
| fusion_drop: 0. | |
| workers: 32 # data loader workers | |
| workers_val: 8 | |
| batch_size: 64 # batch size for training | |
| batch_size_val: 16 # batch size for validation during training, memory and speed tradeoff | |
| start_epoch: 0 | |
| epochs: 1000 | |
| lr_backbone: 5.e-5 | |
| lr_text_encoder: 5.e-5 | |
| lr: 1.e-4 | |
| weight_decay: 1.e-4 | |
| amsgrad: True | |
| manual_seed: 0 | |
| print_freq: 100 | |
| exp_name: open | |
| output_folder: exp/refcoco | |
| save_freq: 1 | |
| weight: | |
| resume: | |
| evaluate: True # evaluate on validation set, extra gpu memory needed and small batch_size_val is recommend | |
| Distributed: | |
| dist_url: tcp://localhost:12345 | |
| dist_backend: 'nccl' | |
| multiprocessing_distributed: True | |
| world_size: 1 | |
| rank: 0 | |
| TEST: | |
| window12: True # if use window12 pretrained for training, testing set true | |
| test_split: test_unseen | |
| test_lmdb: path/refcoco/test_unseen.lmdb | |
| visualize: False |