update

377dccd verified 3 months ago

7.76 kB

	# Copyright 2022-present, Lorenzo Bonicelli, Pietro Buzzega, Matteo Boschini, Angelo Porrello, Simone Calderara.
	# All rights reserved.
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	from argparse import Namespace
	from typing import Tuple

	import numpy as np
	import torch.nn as nn
	import torch.optim
	from torch.utils.data import DataLoader, Dataset


	class ContinualDataset:
	"""
	Continual learning evaluation setting.
	"""
	NAME: str
	SETTING: str
	N_CLASSES: int
	N_CLASSES_PER_TASK: int
	N_TASKS: int

	def __init__(self, args: Namespace) -> None:
	"""
	Initializes the train and test lists of dataloaders.
	:param args: the arguments which contains the hyperparameters
	"""
	self.train_loader = None
	self.test_loaders = []
	self.i = 0
	self.args = args

	if not self.args.half_data_in_first_task and self.N_CLASSES // self.N_TASKS < 2:
	raise ValueError(f"Each task should have at least 2 classes, got N_CLASSES={self.N_CLASSES}, N_TASKS={self.N_TASKS}")
	if not all((self.NAME, self.SETTING, self.N_CLASSES, self.N_CLASSES_PER_TASK, self.N_TASKS)):
	raise NotImplementedError('The dataset must be initialized with all the required fields.')
	if self.args.half_data_in_first_task:
	self.N_TASKS = self.N_TASKS // 2 + 1

	def get_data_loaders(self) -> Tuple[DataLoader, DataLoader]:
	"""
	Creates and returns the training and test loaders for the current task.
	The current training loader and all test loaders are stored in self.
	:return: the current training and test loaders
	"""
	raise NotImplementedError

	@staticmethod
	def get_backbone() -> nn.Module:
	"""
	Returns the backbone to be used for to the current dataset.
	"""
	raise NotImplementedError

	@staticmethod
	def get_transform() -> nn.Module:
	"""
	Returns the transform to be used for to the current dataset.
	"""
	raise NotImplementedError

	@staticmethod
	def get_loss() -> nn.Module:
	"""
	Returns the loss to be used for to the current dataset.
	"""
	raise NotImplementedError

	@staticmethod
	def get_normalization_transform() -> nn.Module:
	"""
	Returns the transform used for normalizing the current dataset.
	"""
	raise NotImplementedError

	@staticmethod
	def get_denormalization_transform() -> nn.Module:
	"""
	Returns the transform used for denormalizing the current dataset.
	"""
	raise NotImplementedError

	@staticmethod
	def get_scheduler(model, args: Namespace) -> torch.optim.lr_scheduler._LRScheduler:
	"""
	Returns the scheduler to be used for to the current dataset.
	"""
	raise NotImplementedError

	@staticmethod
	def get_epochs():
	raise NotImplementedError

	@staticmethod
	def get_batch_size():
	raise NotImplementedError

	@staticmethod
	def get_minibatch_size():
	raise NotImplementedError

	def permute_tasks(self, train_dataset, test_dataset) -> None:
	"""
	Changes the order of classes in the dataset, so with different seed data in each task is different
	"""
	train_labels = train_dataset.targets
	classes = np.unique(train_labels)
	new_classes = np.random.RandomState(seed=self.args.seed).permutation(classes)

	train_dataset.targets = [new_classes[c] for c in train_dataset.targets]
	test_dataset.targets = [new_classes[c] for c in test_dataset.targets]


	def store_masked_loaders(train_dataset: Dataset, test_dataset: Dataset,
	setting: ContinualDataset) -> Tuple[DataLoader, DataLoader]:
	"""
	Divides the dataset into tasks.
	:param train_dataset: train dataset
	:param test_dataset: test dataset
	:param setting: continual learning setting
	:return: train and test loaders
	"""
	if setting.args.half_data_in_first_task and setting.i == 0:
	n_classes = setting.N_CLASSES // 2
	else:
	n_classes = setting.N_CLASSES_PER_TASK
	train_mask = np.logical_and(np.array(train_dataset.targets) >= setting.i,
	np.array(train_dataset.targets) < setting.i + n_classes)
	test_mask = np.logical_and(np.array(test_dataset.targets) >= setting.i,
	np.array(test_dataset.targets) < setting.i + n_classes)

	train_dataset.data = train_dataset.data[train_mask]
	test_dataset.data = test_dataset.data[test_mask]

	train_dataset.targets = np.array(train_dataset.targets)[train_mask]
	test_dataset.targets = np.array(test_dataset.targets)[test_mask]

	train_loader = DataLoader(train_dataset,
	batch_size=setting.args.batch_size, shuffle=True, num_workers=4)
	test_loader = DataLoader(test_dataset,
	batch_size=setting.args.batch_size, shuffle=False, num_workers=4)
	setting.test_loaders.append(test_loader)
	setting.train_loader = train_loader

	setting.i += n_classes
	return train_loader, test_loader


	def get_previous_train_loader(train_dataset: Dataset, batch_size: int,
	setting: ContinualDataset) -> DataLoader:
	"""
	Creates a dataloader for the previous task.
	:param train_dataset: the entire training set
	:param batch_size: the desired batch size
	:param setting: the continual dataset at hand
	:return: a dataloader
	"""
	train_mask = np.logical_and(np.array(train_dataset.targets) >=
	setting.i - setting.N_CLASSES_PER_TASK, np.array(train_dataset.targets)
	< setting.i - setting.N_CLASSES_PER_TASK + setting.N_CLASSES_PER_TASK)

	train_dataset.data = train_dataset.data[train_mask]
	train_dataset.targets = np.array(train_dataset.targets)[train_mask]

	return DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

	def get_first_train_loader(train_dataset: Dataset,
	setting: ContinualDataset) -> DataLoader:
	"""
	Creates a dataloader for the previous task.
	:param train_dataset: the entire training set
	:param batch_size: the desired batch size
	:param setting: the continual dataset at hand
	:return: a dataloader
	"""
	n_classes = setting.N_CLASSES_PER_TASK
	train_mask = np.logical_and(np.array(train_dataset.targets) >= 0,
	np.array(train_dataset.targets) < 0 + n_classes)

	train_dataset.data = train_dataset.data[train_mask]
	train_dataset.targets = np.array(train_dataset.targets)[train_mask]
	train_loader = DataLoader(train_dataset,
	batch_size=setting.args.batch_size, shuffle=True, num_workers=4)

	return train_loader
	def get_first_test_loader(test_dataset: Dataset,
	setting: ContinualDataset) -> DataLoader:
	"""
	Creates a dataloader for the previous task.
	:param train_dataset: the entire training set
	:param batch_size: the desired batch size
	:param setting: the continual dataset at hand
	:return: a dataloader
	"""
	n_classes = setting.N_CLASSES_PER_TASK

	test_mask = np.logical_and(np.array(test_dataset.targets) >= setting.i,
	np.array(test_dataset.targets) < setting.i + n_classes)
	test_dataset.data = test_dataset.data[test_mask]

	test_loader = DataLoader(test_dataset,
	batch_size=setting.args.batch_size, shuffle=False, num_workers=4)


	return test_loader