Mugs / src /multicropdataset.py

Upload 13 files

3c849be about 3 years ago

15.9 kB

	# Copyright 2022 Garena Online Private Limited
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""
	multi-crop dataset to implement multi-crop augmentation and also dataset
	"""
	import copy
	import random

	import torch
	import torchvision.transforms as transforms
	from PIL import Image, ImageFilter, ImageOps
	from src.dataset import ImageFolder
	from src.RandAugment import rand_augment_transform
	from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
	from timm.data.random_erasing import RandomErasing
	from timm.data.transforms import _pil_interp


	class GaussianBlur(object):
	"""
	Apply Gaussian Blur to the PIL image.
	"""

	def __init__(self, p=0.5, radius_min=0.1, radius_max=2.0):
	self.prob = p
	self.radius_min = radius_min
	self.radius_max = radius_max

	def __call__(self, img):
	do_it = random.random() <= self.prob
	if not do_it:
	return img

	return img.filter(
	ImageFilter.GaussianBlur(
	radius=random.uniform(self.radius_min, self.radius_max)
	)
	)


	class Solarization(object):
	"""
	Apply Solarization to the PIL image.
	"""

	def __init__(self, p):
	self.p = p

	def __call__(self, img):
	if random.random() < self.p:
	return ImageOps.solarize(img)
	else:
	return img


	def strong_transforms(
	img_size=224,
	scale=(0.08, 1.0),
	ratio=(0.75, 1.3333333333333333),
	hflip=0.5,
	vflip=0.0,
	color_jitter=0.4,
	auto_augment="rand-m9-mstd0.5-inc1",
	interpolation="random",
	use_prefetcher=True,
	mean=IMAGENET_DEFAULT_MEAN, # (0.485, 0.456, 0.406)
	std=IMAGENET_DEFAULT_STD, # (0.229, 0.224, 0.225)
	re_prob=0.25,
	re_mode="pixel",
	re_count=1,
	re_num_splits=0,
	color_aug=False,
	strong_ratio=0.45,
	):
	"""
	for use in a mixing dataset that passes
	* all data through the first (primary) transform, called the 'clean' data
	* a portion of the data through the secondary transform
	* normalizes and converts the branches above with the third, final transform
	"""

	scale = tuple(scale or (0.08, 1.0)) # default imagenet scale range
	ratio = tuple(ratio or (3.0 / 4.0, 4.0 / 3.0)) # default imagenet ratio range

	primary_tfl = []
	if hflip > 0.0:
	primary_tfl += [transforms.RandomHorizontalFlip(p=hflip)]
	if vflip > 0.0:
	primary_tfl += [transforms.RandomVerticalFlip(p=vflip)]

	secondary_tfl = []
	if auto_augment:
	assert isinstance(auto_augment, str)
	if isinstance(img_size, tuple):
	img_size_min = min(img_size)
	else:
	img_size_min = img_size
	aa_params = dict(
	translate_const=int(img_size_min * strong_ratio),
	img_mean=tuple([min(255, round(255 * x)) for x in mean]),
	)
	if interpolation and interpolation != "random":
	aa_params["interpolation"] = _pil_interp(interpolation)
	if auto_augment.startswith("rand"):
	secondary_tfl += [rand_augment_transform(auto_augment, aa_params)]
	if color_jitter is not None and color_aug:
	# color jitter is enabled when not using AA
	flip_and_color_jitter = [
	transforms.RandomApply(
	[
	transforms.ColorJitter(
	brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1
	)
	],
	p=0.8,
	),
	transforms.RandomGrayscale(p=0.2),
	]
	secondary_tfl += flip_and_color_jitter

	if interpolation == "random":
	interpolation = (Image.BILINEAR, Image.BICUBIC)
	else:
	interpolation = _pil_interp(interpolation)
	final_tfl = [
	transforms.RandomResizedCrop(
	size=img_size, scale=scale, ratio=ratio, interpolation=Image.BICUBIC
	)
	]
	if use_prefetcher:
	# prefetcher and collate will handle tensor conversion and norm
	final_tfl += [transforms.ToTensor()]
	else:
	final_tfl += [
	transforms.ToTensor(),
	transforms.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)),
	]
	if re_prob > 0.0:
	final_tfl.append(
	RandomErasing(
	re_prob,
	mode=re_mode,
	max_count=re_count,
	num_splits=re_num_splits,
	device="cpu",
	)
	)
	return transforms.Compose(primary_tfl + secondary_tfl + final_tfl)


	class DataAugmentation(object):
	"""
	implement multi-crop data augmentation.
	--global_crops_scale: scale range of the 224-sized cropped image before resizing
	--local_crops_scale: scale range of the 96-sized cropped image before resizing
	--local_crops_number: Number of small local views to generate
	--prob: when we use strong augmentation and weak augmentation, the ratio of images to
	be cropped with strong augmentation
	--vanilla_weak_augmentation: whether we use the same augmentation in DINO, namely
	only using weak augmentation
	--color_aug: after AutoAugment, whether we further perform color augmentation
	--local_crop_size: the small crop size
	--timm_auto_augment_par: the parameters for the AutoAugment used in DeiT
	--strong_ratio: the ratio of image augmentation for the AutoAugment used in DeiT
	--re_prob: the re-prob parameter of image augmentation for the AutoAugment used in DeiT
	--use_prefetcher: whether we use prefetcher which can accerelate the training speed
	"""

	def __init__(
	self,
	global_crops_scale,
	local_crops_scale,
	local_crops_number,
	prob=0.5,
	vanilla_weak_augmentation=False,
	color_aug=False,
	local_crop_size=[96],
	timm_auto_augment_par="rand-m9-mstd0.5-inc1",
	strong_ratio=0.45,
	re_prob=0.25,
	use_prefetcher=False,
	):

	## propability to perform strong augmentation
	self.prob = prob
	## whether we use the commonly used augmentations, e.g. DINO or MoCo-V3
	self.vanilla_weak_augmentation = vanilla_weak_augmentation

	flip_and_color_jitter = transforms.Compose(
	[
	transforms.RandomHorizontalFlip(p=0.5),
	transforms.RandomApply(
	[
	transforms.ColorJitter(
	brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1
	)
	],
	p=0.8,
	),
	transforms.RandomGrayscale(p=0.2),
	]
	)

	if use_prefetcher:
	normalize = transforms.Compose(
	[
	transforms.ToTensor(),
	]
	)
	else:
	normalize = transforms.Compose(
	[
	transforms.ToTensor(),
	transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
	]
	)

	##====== build augmentation of global crops, i.e. 224-sized image crops =========
	# first global crop, always weak augmentation
	self.global_transfo1 = transforms.Compose(
	[
	transforms.RandomResizedCrop(
	224, scale=global_crops_scale, interpolation=Image.BICUBIC
	),
	flip_and_color_jitter,
	GaussianBlur(1.0),
	normalize,
	]
	)

	# second global crop, always weak augmentation
	self.global_transfo2 = transforms.Compose(
	[
	transforms.RandomResizedCrop(
	224, scale=global_crops_scale, interpolation=Image.BICUBIC
	),
	flip_and_color_jitter,
	GaussianBlur(0.1),
	Solarization(0.2),
	normalize,
	]
	)

	# strong augmentation, maybe used if we need to perform strong augmentation
	self.global_transfo3 = strong_transforms(
	img_size=224,
	scale=global_crops_scale,
	ratio=(0.75, 1.3333333333333333),
	hflip=0.5,
	vflip=0.0,
	color_jitter=0.4,
	auto_augment=timm_auto_augment_par, # 'rand-m9-mstd0.5-inc1'
	interpolation="random",
	use_prefetcher=use_prefetcher, # True
	mean=IMAGENET_DEFAULT_MEAN, # (0.485, 0.456, 0.406)
	std=IMAGENET_DEFAULT_STD, # (0.229, 0.224, 0.225)
	re_prob=re_prob, # 0.25
	re_mode="pixel",
	re_count=1,
	re_num_splits=0,
	color_aug=color_aug,
	strong_ratio=strong_ratio,
	)

	##====== build augmentation of local crops, i.e. 96-sized image crops =========
	self.local_crops_number = (
	local_crops_number # transformation for the local small crops
	)
	assert local_crop_size[0] == 96
	# weak augmentation, maybe used if we need to perform weak augmentation
	self.local_transfo = transforms.Compose(
	[
	transforms.RandomResizedCrop(
	local_crop_size[0],
	scale=local_crops_scale,
	interpolation=Image.BICUBIC,
	),
	flip_and_color_jitter,
	GaussianBlur(p=0.5),
	normalize,
	]
	)
	# strong augmentation, maybe used if we need to perform strong augmentation
	self.local_transfo2 = strong_transforms(
	img_size=local_crop_size[0], # (224, 224)
	scale=local_crops_scale, # (0.08, 1.0)
	ratio=(0.75, 1.3333333333333333), # (0.75, 1.3333333333333333)
	hflip=0.5, # 0.5
	vflip=0.0, # 0.0
	color_jitter=0.4, # 0.4
	auto_augment=timm_auto_augment_par, # 'rand-m9-mstd0.5-inc1'
	interpolation="random", # 'random'
	use_prefetcher=use_prefetcher, # True
	mean=IMAGENET_DEFAULT_MEAN, # (0.485, 0.456, 0.406)
	std=IMAGENET_DEFAULT_STD, # (0.229, 0.224, 0.225)
	re_prob=re_prob, # 0.25
	re_mode="pixel", # 'pixel'
	re_count=1, # 1
	re_num_splits=0, # 0
	color_aug=color_aug,
	strong_ratio=strong_ratio,
	)

	def __call__(self, image):
	"""
	implement multi-crop data augmentation. Generate two 224-sized +
	"local_crops_number" 96-sized images
	"""
	crops = []
	##====== images to be fed into teacher, two 224-sized =========
	img1 = self.global_transfo1(image)
	img2 = self.global_transfo2(image)
	crops.append(img1)
	crops.append(img2)

	##====== images to be fed into student, two 224-sized + "local_crops_number" 96-sized =========
	# first to generate two 224-sized
	# this weak_flag indicates whether the current image is weakly augmented.
	# For local group supervision, we only use weakly augmented images of size 224 to
	# update the memory for local-group aggregation.
	weak_flag = False

	if self.vanilla_weak_augmentation is True:
	## directly copy the images of weak augmentation
	crops.append(copy.deepcopy(img1))
	crops.append(copy.deepcopy(img2))
	weak_flag = True
	elif self.prob < 1.0 and random.random() > self.prob:
	## whether perform strong augmentation
	crops.append(self.global_transfo3(image))
	crops.append(self.global_transfo3(image))
	else:
	## perform weak augmentation
	crops.append(self.global_transfo1(image))
	crops.append(self.global_transfo2(image))
	weak_flag = True

	# then to generate "local_crops_number" 96-sized
	for _ in range(self.local_crops_number):
	if self.prob < 1.0 and random.random() > self.prob:
	## whether perform strong augmentation
	crops.append(self.local_transfo2(image))
	else:
	## perform weak augmentation
	crops.append(self.local_transfo(image))

	return crops, weak_flag


	def get_dataset(args):
	"""
	build a multi-crop data augmentation and a dataset/dataloader
	"""
	## preparing augmentations, including weak and strong augmentations
	transform = DataAugmentation(
	global_crops_scale=args.global_crops_scale,
	local_crops_scale=args.local_crops_scale,
	local_crops_number=args.local_crops_number,
	vanilla_weak_augmentation=args.vanilla_weak_augmentation,
	prob=args.prob,
	color_aug=args.color_aug,
	local_crop_size=args.size_crops,
	timm_auto_augment_par=args.timm_auto_augment_par,
	strong_ratio=args.strong_ratio,
	re_prob=args.re_prob,
	use_prefetcher=args.use_prefetcher,
	)

	## For debug mode, we only load the first two classes to reduce data reading time.
	## otherwise, we load all training data for pretraining.
	class_num = 2 if args.debug else 1000
	dataset = ImageFolder(args.data_path, transform=transform, class_num=class_num)

	sampler = torch.utils.data.DistributedSampler(dataset, shuffle=True)
	data_loader = torch.utils.data.DataLoader(
	dataset,
	sampler=sampler,
	batch_size=args.batch_size_per_gpu,
	num_workers=args.num_workers,
	pin_memory=True,
	drop_last=True,
	)
	return data_loader


	class data_prefetcher:
	"""
	implement data prefetcher. we perform some augmentation on GPUs intead of CPUs
	--loader: a data loader
	--fp16: whether we use fp16, if yes, we need to tranform the data to be fp16
	"""

	def __init__(self, loader, fp16=True):
	self.loader = iter(loader)
	self.fp16 = fp16
	self.stream = torch.cuda.Stream()
	self.mean = torch.tensor([0.485, 0.456, 0.406]).cuda().view(1, 3, 1, 1)
	self.std = torch.tensor([0.229, 0.224, 0.225]).cuda().view(1, 3, 1, 1)
	if fp16:
	self.mean = self.mean.half()
	self.std = self.std.half()

	self.preload()

	def preload(self):
	"""
	preload the next minibatch of data
	"""
	try:
	self.multi_crops, self.weak_flag = next(self.loader)
	except StopIteration:
	self.multi_crops, self.weak_flag = None, None
	return

	with torch.cuda.stream(self.stream):
	for i in range(len(self.multi_crops)):
	self.multi_crops[i] = self.multi_crops[i].cuda(non_blocking=True)
	if self.fp16:
	self.multi_crops[i] = (
	self.multi_crops[i].half().sub_(self.mean).div_(self.std)
	)
	else:
	self.multi_crops[i] = (
	self.multi_crops[i].float().sub_(self.mean).div_(self.std)
	)

	def next(self):
	"""
	load the next minibatch of data
	"""
	torch.cuda.current_stream().wait_stream(self.stream)
	multi_crops, weak_flags = self.multi_crops, self.weak_flag
	self.preload()
	return multi_crops, weak_flags