Spaces:
Runtime error
Runtime error
| """ | |
| Copyright (c) 2022, salesforce.com, inc. | |
| All rights reserved. | |
| SPDX-License-Identifier: BSD-3-Clause | |
| For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause | |
| """ | |
| from lavis.datasets.builders.base_dataset_builder import BaseDatasetBuilder, MultiModalDatasetBuilder | |
| from lavis.datasets.datasets.capfilt_dataset import CapFiltCaptionInstructDataset, CapFiltCaptionDataset | |
| from lavis.datasets.datasets.coco_caption_datasets import ( | |
| COCOCapDataset, | |
| COCOCapInstructDataset, | |
| COCOCapEvalDataset, | |
| NoCapsEvalDataset, | |
| ) | |
| from lavis.common.registry import registry | |
| from lavis.datasets.datasets.video_caption_datasets import ( | |
| VideoCaptionDataset, | |
| VideoCaptionEvalDataset, | |
| ClipCaptionDataset, | |
| ClipCaptionInstructDataset, | |
| ClipCaptionEvalDataset, | |
| VideoCaptionInstructDataset, | |
| WebVideoCaptionDataset, | |
| WebVideoCaptionInstructDataset, | |
| ) | |
| from lavis.datasets.datasets.violin_dataset import ( | |
| ViolinVideoCaptionDataset, | |
| ViolinVideoCaptionInstructDataset, | |
| ViolinVideoCaptionEvalDataset | |
| ) | |
| from lavis.datasets.datasets.valor_caption import VALORCaptionInstuctDataset, VALORCaptionEvalDataset, VALORCaptionDataset | |
| from lavis.datasets.datasets.vatex_captioning_datasets import VATEXCaptionInstuctDataset, VATEXCaptionEvalDataset, VATEXCaptionDataset | |
| from lavis.datasets.datasets.vlep_dataset import VlepVideoDataset, VlepVideoInstructDataset, VlepVideoEvalDataset | |
| from lavis.datasets.datasets.vsr_datasets import VSRCaptionDataset, VSRCaptionInstructDataset, VSRCaptionEvalDataset | |
| from lavis.datasets.datasets.textcaps_datasets import TextCapsCapDataset, TextCapsCapInstructDataset, TextCapsCapEvalDataset | |
| class COCOCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = COCOCapDataset | |
| eval_dataset_cls = COCOCapEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/coco/defaults_cap.yaml", | |
| } | |
| class COCOCapInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = COCOCapInstructDataset | |
| eval_dataset_cls = COCOCapEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/coco/defaults_cap_instruct.yaml", | |
| } | |
| class Flickr30kCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = COCOCapDataset | |
| eval_dataset_cls = COCOCapEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/flickr30k/defaults_cap.yaml", | |
| } | |
| class Flickr30kCapInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = COCOCapInstructDataset | |
| eval_dataset_cls = COCOCapEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/flickr30k/defaults_cap_instuct.yaml", | |
| } | |
| class COCOCapBuilder(BaseDatasetBuilder): | |
| eval_dataset_cls = NoCapsEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/nocaps/defaults.yaml", | |
| } | |
| class VSRCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = VSRCaptionDataset | |
| eval_dataset_cls = VSRCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/vsr/defaults.yaml", | |
| } | |
| class VSRCapInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = VSRCaptionInstructDataset | |
| eval_dataset_cls = VSRCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/vsr/defaults.yaml", | |
| } | |
| class TextCapsCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = TextCapsCapDataset | |
| eval_dataset_cls = TextCapsCapEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/textcaps/defaults.yaml", | |
| } | |
| class TextCapsCapInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = TextCapsCapInstructDataset | |
| eval_dataset_cls = TextCapsCapEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/textcaps/defaults_instruct.yaml", | |
| } | |
| class CapFiltCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = CapFiltCaptionDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/capfilt14m/defaults_cap.yaml", | |
| } | |
| class CapFiltCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = CapFiltCaptionInstructDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/capfilt14m/defaults_cap_instruct.yaml", | |
| } | |
| class MSRVTTCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = VideoCaptionDataset | |
| eval_dataset_cls = VideoCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/msrvtt/defaults_cap.yaml", | |
| } | |
| class MSVDCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = VideoCaptionDataset | |
| eval_dataset_cls = VideoCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/msvd/defaults_cap.yaml", | |
| } | |
| class VATEXCapBuilder(MultiModalDatasetBuilder): | |
| train_dataset_cls = VATEXCaptionDataset | |
| eval_dataset_cls = VATEXCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/vatex/defaults_cap.yaml", | |
| } | |
| class MSRVTTCapInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = VideoCaptionInstructDataset | |
| eval_dataset_cls = VideoCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/msrvtt/defaults_cap_instruct.yaml", | |
| } | |
| class MSVDCapInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = VideoCaptionInstructDataset | |
| eval_dataset_cls = VideoCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/msvd/defaults_cap_instruct.yaml", | |
| } | |
| class VATEXCapInstructBuilder(MultiModalDatasetBuilder): | |
| train_dataset_cls = VATEXCaptionInstuctDataset | |
| eval_dataset_cls = VATEXCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/vatex/defaults_cap_instruct.yaml", | |
| } | |
| class WebVid2MCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = WebVideoCaptionDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/webvid/defaults_cap.yaml", | |
| } | |
| class WebVid2MCapInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = WebVideoCaptionInstructDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/webvid/defaults_cap_instruct.yaml", | |
| } | |
| class ViolinCapBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ViolinVideoCaptionDataset | |
| eval_dataset_cls = ViolinVideoCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/violin/defaults_cap.yaml", | |
| } | |
| class ViolinCapInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ViolinVideoCaptionInstructDataset | |
| eval_dataset_cls = ViolinVideoCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/violin/defaults_cap_instruct.yaml", | |
| } | |
| class VALORCaptionBuilder(MultiModalDatasetBuilder): | |
| train_dataset_cls = VALORCaptionDataset | |
| eval_dataset_cls = VALORCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/valor/defaults_mm_cap.yaml" | |
| } | |
| class VALORCaptionInstructBuilder(MultiModalDatasetBuilder): | |
| train_dataset_cls = VALORCaptionInstuctDataset | |
| eval_dataset_cls = VALORCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/valor/defaults_mm_cap_instruct.yaml" | |
| } | |
| class VlepCaptionBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = VlepVideoDataset | |
| eval_dataset_cls = VlepVideoEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/vlep/defaults_cap.yaml" | |
| } | |
| class VlepCaptionInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = VlepVideoInstructDataset | |
| eval_dataset_cls = VlepVideoEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/vlep/defaults_cap_instruct.yaml" | |
| } | |
| class YouCookCaptionBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ClipCaptionDataset | |
| eval_dataset_cls = ClipCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/youcook/defaults_cap.yaml", | |
| } | |
| class YouCookCaptionInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ClipCaptionInstructDataset | |
| eval_dataset_cls = ClipCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/youcook/defaults_cap_instruct.yaml", | |
| } | |
| class COINCaptionBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ClipCaptionDataset | |
| eval_dataset_cls = ClipCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/coin/defaults_cap.yaml", | |
| } | |
| class COINCaptionInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ClipCaptionInstructDataset | |
| eval_dataset_cls = ClipCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/coin/defaults_cap_instruct.yaml", | |
| } | |
| class CharadeCaptionBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ClipCaptionDataset | |
| eval_dataset_cls = ClipCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/charade/defaults_cap.yaml", | |
| } | |
| class CharadeCaptionInstructBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ClipCaptionInstructDataset | |
| eval_dataset_cls = ClipCaptionEvalDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/charade/defaults_cap_instruct.yaml", | |
| } | |