instructblip / lavis /configs /datasets /textcaps /defaults_instruct.yaml
Thien Huynh
Initialization
be13417
# Copyright (c) 2022, salesforce.com, inc.
# All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
datasets:
textcaps_caption_instruct: # name of the dataset builder
# data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features]
vis_processor:
train:
name: "clip_image_train"
image_size: 224
eval:
name: "clip_image_eval"
image_size: 224
text_processor:
train:
name: blip_instruction
modality: image
task: caption
eval:
name: blip_caption
build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing
annotations:
train:
url:
- https://dl.fbaipublicfiles.com/textvqa/data/textcaps/TextCaps_0.1_train.json
storage:
- TextCaps/TextCaps_0.1_train.json
val:
url:
- https://dl.fbaipublicfiles.com/textvqa/data/textcaps/TextCaps_0.1_val.json
storage:
- TextCaps/TextCaps_0.1_val.json
test:
url:
- https://dl.fbaipublicfiles.com/textvqa/data/textcaps/TextCaps_0.1_test.json
storage:
- TextCaps/TextCaps_0.1_test.json
images:
# storage: nocaps/images
storage: /export/share/datasets/vision_language/TextCaps/images