Spaces:
Runtime error
Runtime error
| # from .processors.builder import build_processors | |
| from .xgpt3_dataset import MultiModalDataset | |
| from mplug_owl_video.processing_mplug_owl import MplugOwlImageProcessor, MplugOwlProcessor | |
| def train_valid_test_datasets_provider(data_path, config, tokenizer, seq_length=1024, loss_objective = 'sequential'): | |
| """Build train and valid datasets.""" | |
| print('> building train and validation datasets for mPLUG-Owl ...') | |
| train_ds, valid_ds = build_train_valid_test_datasets( | |
| input_file=data_path, | |
| tokenizer=tokenizer, | |
| max_length=seq_length, | |
| config=config, loss_objective = loss_objective) | |
| print("> finished creating mPLUG-Owl datasets ...") | |
| return train_ds, valid_ds | |
| def build_train_valid_test_datasets(input_file, tokenizer, max_length=80, config=None): | |
| # train_processors = build_processors(config['train_processors']) | |
| # valid_processors = build_processors(config['valid_processors']) | |
| image_processor = MplugOwlImageProcessor.from_pretrained(config['pretrained_ckpt']) | |
| processor = MplugOwlProcessor(image_processor, tokenizer) | |
| assert len(input_file) == 2 # If you have files more than 2, modify code at here or merger them into train and dev | |
| train_ds = MultiModalDataset(input_file[0], tokenizer, processor, max_length, loss_objective = loss_objective) | |
| valid_ds = MultiModalDataset(input_file[1], tokenizer, processor, max_length, loss_objective = loss_objective) | |
| return (train_ds, valid_ds) | |