SAE / attacks /CleanSheet /tests /test_data /test_dataset /test_cifar.py

Upload 192 files

998bb30 verified about 2 months ago

9.44 kB

	import numpy as np
	import pytest

	from .utils import FakeDataset, build_fake_dataset

	CIFAR_TESTSET_NUM = 1000


	def build_cifar_fake_dataset(dataset_type: str, **kwargs) -> FakeDataset:
	if dataset_type.endswith('CIFAR100'):
	y_range = (0, 99)
	dataset_type = dataset_type.replace('CIFAR100', 'FakeDataset')
	else:
	y_range = (0, 9)
	dataset_type = dataset_type.replace('CIFAR10', 'FakeDataset')

	dataset_cfg = dict(type=dataset_type,
	x_shape=(3, 32, 32),
	y_range=y_range,
	nums=CIFAR_TESTSET_NUM,
	**kwargs)

	return build_fake_dataset(dataset_cfg)


	@pytest.mark.parametrize('dataset_type', ['CIFAR10', 'CIFAR100'])
	def test_xy(dataset_type: str) -> None:
	cifar = build_cifar_fake_dataset(dataset_type)

	xy = cifar.get_xy()
	x, y = xy
	assert len(x) == len(y)
	assert isinstance(y[0], int)

	old_x = x.copy()
	old_y = y.copy()

	cifar.set_xy(xy)
	assert all([np.array_equal(nx, ox) for nx, ox in zip(cifar.data, old_x)])
	assert cifar.targets == old_y

	x = x[:cifar.num_classes]
	y = y[:cifar.num_classes]
	cifar.set_xy((x, y))
	assert all([np.array_equal(nx, ox) for nx, ox in zip(cifar.data, x)])
	assert cifar.targets == y
	assert cifar.num_classes == len(set(y))
	assert len(cifar.data.shape) == 4


	@pytest.mark.parametrize(['start_idx', 'end_idx', 'dataset_type'],
	[(0, 9, 'IndexCIFAR10'), (-10, 8, 'IndexCIFAR10'),
	(2, 12, 'IndexCIFAR10'), (4, 4, 'IndexCIFAR10'),
	(4, 3, 'IndexCIFAR10'), (0, 99, 'IndexCIFAR100'),
	(-10, 8, 'IndexCIFAR100'),
	(40, 50, 'IndexCIFAR100')])
	def test_index(start_idx: int, end_idx: int, dataset_type: str) -> None:
	kwargs = dict(start_idx=start_idx, end_idx=end_idx)

	if start_idx > end_idx:
	with pytest.raises(ValueError):
	_ = build_cifar_fake_dataset(dataset_type, **kwargs)
	return
	cifar = build_cifar_fake_dataset(dataset_type, **kwargs)
	assert cifar.start_idx == start_idx
	assert cifar.end_idx == end_idx

	for y in cifar.targets:
	assert start_idx <= y <= end_idx

	assert cifar.num_classes == min(
	cifar.end_idx, cifar.raw_num_classes - 1) - max(cifar.start_idx, 0) + 1
	assert len(cifar.data.shape) == 4


	@pytest.mark.parametrize(['ratio', 'dataset_type'], [(-1, 'RatioCIFAR10'),
	(0, 'RatioCIFAR10'),
	(0.1, 'RatioCIFAR10'),
	(0.5, 'RatioCIFAR10'),
	(1, 'RatioCIFAR10'),
	(2, 'RatioCIFAR10'),
	(0.4, 'RatioCIFAR100')])
	def test_ratio(ratio: float, dataset_type: str) -> None:
	kwargs = dict(ratio=ratio)

	if ratio <= 0 or ratio > 1:
	with pytest.raises(ValueError):
	_ = build_cifar_fake_dataset(dataset_type, **kwargs)
	return
	cifar = build_cifar_fake_dataset(dataset_type, **kwargs)

	assert cifar.num_classes == cifar.raw_num_classes
	assert len(cifar.targets) == \
	int(CIFAR_TESTSET_NUM / cifar.num_classes * ratio) * cifar.num_classes
	assert len(cifar.data.shape) == 4


	@pytest.mark.parametrize('range_ratio', [(-1, 0.2), (0, 2), (0.1, 0.1),
	(0.5, 0.2), (0.1, 0.5), (0, 1)])
	@pytest.mark.parametrize('dataset_type',
	['RangeRatioCIFAR10', 'RangeRatioCIFAR100'])
	def test_range_ratio(range_ratio: tuple[float, float],
	dataset_type: str) -> None:
	kwargs = dict(range_ratio=range_ratio)

	start_ratio = range_ratio[0]
	end_ratio = range_ratio[1]
	if not (0 <= start_ratio < end_ratio <= 1):
	with pytest.raises(ValueError):
	_ = build_cifar_fake_dataset(dataset_type, **kwargs)
	return

	cifar = build_cifar_fake_dataset(dataset_type, **kwargs)
	assert cifar.num_classes == cifar.raw_num_classes
	assert len(cifar.targets) == \
	round(CIFAR_TESTSET_NUM * (end_ratio - start_ratio))
	assert len(cifar.data.shape) == 4


	@pytest.mark.parametrize(['range_ratio1', 'range_ratio2'],
	[((0, 0.5), (0.5, 1)), ((0, 0.6), (0.4, 1)),
	((0, 0.7), (0.3, 1)), ((0, 0.5), (0, 1))])
	@pytest.mark.parametrize('dataset_type',
	['RangeRatioCIFAR10', 'RangeRatioCIFAR100'])
	def test_range_ratio_intersection(range_ratio1: tuple[float, float],
	range_ratio2: tuple[float, float],
	dataset_type: str) -> None:

	cifar1 = build_cifar_fake_dataset(dataset_type=dataset_type,
	range_ratio=range_ratio1,
	cache_xy=True)
	cifar2 = build_cifar_fake_dataset(dataset_type=dataset_type,
	range_ratio=range_ratio2,
	cache_xy=True)

	cat_x = np.concatenate([cifar1.data, cifar2.data], axis=0)
	unique_x = np.unique(cat_x, axis=0)
	intersection_number = cat_x.shape[0] - unique_x.shape[0]

	intersection_ratio = max(0, range_ratio1[1] - range_ratio2[0])
	assert round(intersection_ratio * CIFAR_TESTSET_NUM) == intersection_number


	@pytest.mark.parametrize('dataset_type',
	['IndexRatioCIFAR10', 'IndexRatioCIFAR100'])
	@pytest.mark.parametrize(['start_idx', 'end_idx', 'ratio'], [(4, 3, 0.5),
	(3, 4, 0),
	(3, 4, 2),
	(1, 4, 0.1)])
	def test_index_ratio(start_idx: int, end_idx: int, ratio: float,
	dataset_type: str) -> None:
	kwargs = dict(start_idx=start_idx, end_idx=end_idx, ratio=ratio)

	if ratio <= 0 or ratio > 1 or start_idx > end_idx:
	with pytest.raises(ValueError):
	_ = build_cifar_fake_dataset(dataset_type, **kwargs)
	return
	cifar = build_cifar_fake_dataset(dataset_type, **kwargs)
	assert cifar.start_idx == start_idx
	assert cifar.end_idx == end_idx

	for y in cifar.targets:
	assert start_idx <= y <= end_idx
	assert len(cifar.targets) == \
	cifar.num_classes / cifar.raw_num_classes * CIFAR_TESTSET_NUM * ratio
	assert len(cifar.data.shape) == 4


	@pytest.mark.parametrize('dataset_type',
	['PoisonLabelCIFAR10', 'PoisonLabelCIFAR100'])
	@pytest.mark.parametrize('poison_label', [-1, 5, 101])
	def test_poison_label(poison_label: int, dataset_type: str) -> None:
	kwargs = dict(poison_label=poison_label)

	if poison_label < 0 or poison_label >= 100:
	with pytest.raises(ValueError):
	_ = build_cifar_fake_dataset(dataset_type, **kwargs)
	return
	cifar = build_cifar_fake_dataset(dataset_type, **kwargs)
	assert cifar.poison_label == poison_label

	assert cifar.num_classes == 1
	assert all(map(lambda x: x == poison_label, cifar.targets))
	assert len(cifar.data.shape) == 4


	@pytest.mark.parametrize(
	'dataset_type', ['RatioPoisonLabelCIFAR10', 'RatioPoisonLabelCIFAR100'])
	@pytest.mark.parametrize('poison_label', [-1, 5, 101])
	@pytest.mark.parametrize('ratio', [0, 0.2, 1, 1.2])
	def test_ratio_poison_label(ratio: float, poison_label: int,
	dataset_type: str) -> None:
	kwargs = dict(ratio=ratio, poison_label=poison_label)

	if (poison_label < 0 or poison_label >= 100) or \
	(ratio <= 0 or ratio > 1):
	with pytest.raises(ValueError):
	_ = build_cifar_fake_dataset(dataset_type, **kwargs)
	return
	cifar = build_cifar_fake_dataset(dataset_type, **kwargs)
	assert cifar.poison_label == poison_label

	assert len(cifar) == round(CIFAR_TESTSET_NUM * ratio)
	assert cifar.num_classes == 1
	assert all(map(lambda x: x == poison_label, cifar.targets))
	assert len(cifar.data.shape) == 4


	@pytest.mark.parametrize(
	'dataset_type',
	['RangeRatioPoisonLabelCIFAR10', 'RangeRatioPoisonLabelCIFAR100'])
	@pytest.mark.parametrize('poison_label', [-1, 1, 101])
	@pytest.mark.parametrize('range_ratio', [(0, 0.2), (0.2, 0.5), (0.5, 1),
	(0.5, 0.2)])
	def test_range_ratio_poison_label(range_ratio: tuple[float,
	float], poison_label: int,
	dataset_type: str) -> None:
	kwargs = dict(range_ratio=range_ratio, poison_label=poison_label)

	if poison_label < 0 or poison_label >= 100 or \
	not (0 <= range_ratio[0] < range_ratio[1] <= 1):
	with pytest.raises(ValueError):
	_ = build_cifar_fake_dataset(dataset_type, **kwargs)
	return
	cifar = build_cifar_fake_dataset(dataset_type, **kwargs)
	assert cifar.poison_label == poison_label

	assert len(cifar) == round(CIFAR_TESTSET_NUM *
	(range_ratio[1] - range_ratio[0]))
	assert cifar.num_classes == 1
	assert all(map(lambda x: x == poison_label, cifar.targets))
	assert len(cifar.data.shape) == 4