Spaces:
Build error
Build error
| """ | |
| Copyright (c) 2022, salesforce.com, inc. | |
| All rights reserved. | |
| SPDX-License-Identifier: BSD-3-Clause | |
| For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause | |
| """ | |
| import os | |
| from lavis.common.registry import registry | |
| from lavis.datasets.builders.base_dataset_builder import BaseDatasetBuilder | |
| from lavis.datasets.datasets.image_text_pair_datasets import ImageTextPairDataset | |
| from lavis.datasets.datasets.laion_dataset import LaionDataset | |
| class ConceptualCaption3MBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ImageTextPairDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/conceptual_caption/defaults_3m.yaml" | |
| } | |
| class ConceptualCaption12MBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ImageTextPairDataset | |
| DATASET_CONFIG_DICT = { | |
| "default": "configs/datasets/conceptual_caption/defaults_12m.yaml" | |
| } | |
| class SBUCaptionBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ImageTextPairDataset | |
| DATASET_CONFIG_DICT = {"default": "configs/datasets/sbu_caption/defaults.yaml"} | |
| class VGCaptionBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = ImageTextPairDataset | |
| DATASET_CONFIG_DICT = {"default": "configs/datasets/vg/defaults_caption.yaml"} | |
| class Laion2BMultiBuilder(BaseDatasetBuilder): | |
| train_dataset_cls = LaionDataset | |
| DATASET_CONFIG_DICT = {"default": "configs/datasets/laion/defaults_2B_multi.yaml"} | |
| def _download_ann(self): | |
| pass | |
| def _download_vis(self): | |
| pass | |
| def build(self): | |
| self.build_processors() | |
| build_info = self.config.build_info | |
| datasets = dict() | |
| split = "train" # laion dataset only has train split | |
| # create datasets | |
| # [NOTE] return inner_datasets (wds.DataPipeline) | |
| dataset_cls = self.train_dataset_cls | |
| datasets[split] = dataset_cls( | |
| vis_processor=self.vis_processors[split], | |
| text_processor=self.text_processors[split], | |
| location=build_info.storage, | |
| ).inner_dataset | |
| return datasets | |