| | import math
|
| | import torch
|
| | from torch.utils.data.sampler import Sampler
|
| |
|
| |
|
| | class EnlargedSampler(Sampler):
|
| | """Sampler that restricts data loading to a subset of the dataset.
|
| |
|
| | Modified from torch.utils.data.distributed.DistributedSampler
|
| | Support enlarging the dataset for iteration-based training, for saving
|
| | time when restart the dataloader after each epoch
|
| |
|
| | Args:
|
| | dataset (torch.utils.data.Dataset): Dataset used for sampling.
|
| | num_replicas (int | None): Number of processes participating in
|
| | the training. It is usually the world_size.
|
| | rank (int | None): Rank of the current process within num_replicas.
|
| | ratio (int): Enlarging ratio. Default: 1.
|
| | """
|
| |
|
| | def __init__(self, dataset, num_replicas, rank, ratio=1):
|
| | self.dataset = dataset
|
| | self.num_replicas = num_replicas
|
| | self.rank = rank
|
| | self.epoch = 0
|
| | self.num_samples = math.ceil(len(self.dataset) * ratio / self.num_replicas)
|
| | self.total_size = self.num_samples * self.num_replicas
|
| |
|
| | def __iter__(self):
|
| |
|
| | g = torch.Generator()
|
| | g.manual_seed(self.epoch)
|
| | indices = torch.randperm(self.total_size, generator=g).tolist()
|
| |
|
| | dataset_size = len(self.dataset)
|
| | indices = [v % dataset_size for v in indices]
|
| |
|
| |
|
| | indices = indices[self.rank:self.total_size:self.num_replicas]
|
| | assert len(indices) == self.num_samples
|
| |
|
| | return iter(indices)
|
| |
|
| | def __len__(self):
|
| | return self.num_samples
|
| |
|
| | def set_epoch(self, epoch):
|
| | self.epoch = epoch
|
| |
|