BLIP / configs /vqa.yaml
datnguyentien204's picture
Upload folder using huggingface_hub (#1)
74bda2d verified
raw
history blame contribute delete
610 Bytes
vqa_root: '/kaggle/working/vision/mscoco' #followed by train2014/
vg_root: '/kaggle/working/vision/visual-genome' #followed by image/
train_files: ['vqa_train','vqa_val','vg_qa']
ann_root: 'annotation'
# set pretrained as a file path or an url
pretrained: 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'
# size of vit model; base or large
vit: 'base'
batch_size_train: 8
batch_size_test: 32
vit_grad_ckpt: False
vit_ckpt_layer: 0
init_lr: 2e-5
image_size: 480
k_test: 128
inference: 'rank'
# optimizer
weight_decay: 0.05
min_lr: 0
max_epoch: 10