| vqa_root: '/kaggle/working/vision/mscoco' #followed by train2014/ | |
| vg_root: '/kaggle/working/vision/visual-genome' #followed by image/ | |
| train_files: ['vqa_train','vqa_val','vg_qa'] | |
| ann_root: 'annotation' | |
| # set pretrained as a file path or an url | |
| pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth' | |
| # size of vit model; base or large | |
| vit: 'base' | |
| batch_size_train: 8 | |
| batch_size_test: 32 | |
| vit_grad_ckpt: False | |
| vit_ckpt_layer: 0 | |
| init_lr: 2e-5 | |
| image_size: 480 | |
| k_test: 128 | |
| inference: 'rank' | |
| # optimizer | |
| weight_decay: 0.05 | |
| min_lr: 0 | |
| max_epoch: 10 |