|
|
|
|
|
|
|
|
|
|
| model:
|
| arch: blip2_opt
|
| model_type: caption_coco_opt2.7b
|
| load_finetuned: False
|
| use_grad_checkpoint: True
|
| freeze_vit: False
|
|
|
| datasets:
|
| coco_caption:
|
| vis_processor:
|
| train:
|
| name: "blip2_image_train"
|
| image_size: 364
|
| eval:
|
| name: "blip_image_eval"
|
| image_size: 364
|
| text_processor:
|
| train:
|
| name: "blip_caption"
|
| prompt: "a photo of "
|
| eval:
|
| name: "blip_caption"
|
|
|
|
|
|
|
|
|
| run:
|
| task: captioning
|
|
|
| lr_sched: "linear_warmup_cosine_lr"
|
| init_lr: 1e-5
|
| min_lr: 0
|
| warmup_lr: 1e-8
|
| warmup_steps: 1000
|
| weight_decay: 0.05
|
| max_epoch: 5
|
| batch_size_train: 16
|
| batch_size_eval: 8
|
| num_workers: 4
|
| accum_grad_iters: 1
|
|
|
| max_len: 30
|
| min_len: 8
|
| num_beams: 5
|
|
|
| seed: 42
|
| output_dir: "output/BLIP2/Caption_coco"
|
|
|
| amp: True
|
| resume_ckpt_path: null
|
|
|
| evaluate: False
|
| train_splits: ["train"]
|
| valid_splits: ["val"]
|
| test_splits: ["test"]
|
|
|
| device: "cuda"
|
| world_size: 1
|
| dist_url: "env://"
|
| distributed: True
|
|
|