Spaces:

williamphoenix
/

Mithridatium

Running

Mithridatium / scripts /demo_invisible_trigger.py

Gustavo Lucca

Invisble watermark training script and tests updated

4ba0984 3 months ago

7.78 kB

	#!/usr/bin/env python3
	"""Demo / sanity-check for an ``invisible`` (UAP-based) backdoor model.

	This script is intended to be run after training has finished; its purpose is
	to load a checkpoint, visualise the universal adversarial perturbation (UAP),
	and optionally inspect how the model behaves on a small batch of clean and
	triggered images. We do not perform any formal evaluation here (see
	``scripts/train_resnet18.py`` and the CLI for that), but the outputs are handy
	for a quick manual sanity check or for generating figures for documentation.

	Typical usage::

	python -m scripts.demo_invisible_trigger \
	--model models/resnet18_invisible_highxi.pth \
	--uap-path models/uap_highxi.pt \
	--uap-norm inf --uap-xi 0.1 --seed 42 \
	--target-class 0 \
	--num-images 6 \
	--save-dir demo_outputs

	If a UAP file already exists on disk ``--uap-path`` it will be loaded. When
	no file is provided a fresh UAP is generated according to the given norm,
	magnitude and seed; the value is automatically written to ``--uap-path`` if a
	path was supplied. The parameters should match those used during training so
	that the same trigger is used for evaluation.

	The script prints a few model predictions and saves a couple of PNGs. By
	default it operates on the CIFAR-10 test set and only considers samples whose
	original label is not the target class (i.e. candidates for the attack).
	"""

	import argparse
	import os
	import random
	from pathlib import Path

	import matplotlib.pyplot as plt
	import torch
	from torch.utils.data import DataLoader
	from torchvision import datasets, transforms, utils as vutils

	from mithridatium.attacks.invisible import apply_invisible_trigger, create_random_uap
	from mithridatium.loader import load_resnet18

	# same normalisation used during training/evaluation
	CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
	CIFAR10_STD = (0.2023, 0.1994, 0.2010)
	NORMALISE = transforms.Normalize(mean=CIFAR10_MEAN, std=CIFAR10_STD)


	def parse_args():
	parser = argparse.ArgumentParser(description="Visualise an invisible-trigger model")
	parser.add_argument("--model", type=str, required=True,
	help="Path to the trojaned ResNet-18 checkpoint")
	parser.add_argument("--uap-path", type=str, default=None,
	help="File containing the universal perturbation."
	"If missing one will be generated.")
	parser.add_argument("--uap-norm", choices=["inf", "2"], default="inf",
	help="Lp norm used when generating a random UAP")
	parser.add_argument("--uap-xi", type=float, default=0.05,
	help="Magnitude of the UAP when it is randomly generated")
	parser.add_argument("--seed", type=int, default=0,
	help="Random seed for reproducibility (UAP + sample pick)")
	parser.add_argument("--target-class", type=int, default=0,
	help="Class that the UAP is supposed to force predictions to")
	parser.add_argument("--num-images", type=int, default=8,
	help="Number of example images to display")
	parser.add_argument("--save-dir", type=str, default="demo_outputs",
	help="Directory where output figures will be written")
	parser.add_argument("--device", type=str, default=None,
	help="Torch device to run on (cpu/cuda). auto-detected if not set")
	parser.add_argument("--compute-asr", action="store_true",
	help="Compute approximate ASR over entire test set")
	return parser.parse_args()


	def ensure_uap(args):
	"""Load or generate a UAP tensor.

	If ``args.uap_path`` exists it is loaded. Otherwise a new perturbation is
	sampled from the specified norm and magnitude. The resulting tensor is
	saved to ``args.uap_path`` when a path is provided (mimicking
	``train_resnet18`` behaviour).
	"""
	if args.uap_path and os.path.exists(args.uap_path):
	uap = torch.load(args.uap_path)
	print(f"Loaded UAP from {args.uap_path}")
	else:
	uap = create_random_uap((3, 32, 32), xi=args.uap_xi, p=args.uap_norm, seed=args.seed)
	if args.uap_path:
	os.makedirs(os.path.dirname(args.uap_path), exist_ok=True)
	torch.save(uap, args.uap_path)
	print(f"Saved generated UAP to {args.uap_path}")
	return uap


	def load_cifar10_test():
	"""Return raw CIFAR-10 test dataset (values in [0,1])."""
	return datasets.CIFAR10("./data", train=False, download=True,
	transform=transforms.ToTensor())


	def select_candidates(dataset, target_class, num_samples, seed=None):
	"""Pick ``num_samples`` indices from the test set that are not ``target_class``."""
	rng = random.Random(seed)
	indices = [i for i, (_, y) in enumerate(dataset) if y != target_class]
	return rng.sample(indices, min(num_samples, len(indices)))


	def make_grid_image(tensors, nrow):
	"""Utility to convert a batch tensor into a matplotlib-ready image."""
	grid = vutils.make_grid(tensors, nrow=nrow, pad_value=1.0)
	# move channel to last dimension and convert to numpy
	return grid.permute(1, 2, 0).cpu().numpy()


	def compute_asr(model, dataset, uap, target_class, device):
	"""Estimate attack success rate on the provided dataset.

	Only non-target samples are considered (mirrors ``InvisibleBackdoorDataset``
	behaviour).
	"""
	loader = DataLoader(dataset, batch_size=256, shuffle=False)
	correct = 0
	total = 0
	model.to(device).eval()
	with torch.no_grad():
	for x, y in loader:
	mask = y != target_class
	if mask.sum() == 0:
	continue
	x = x[mask]
	x_trig = apply_invisible_trigger(x, uap)
	inp = NORMALISE(x_trig).to(device)
	preds = model(inp).argmax(1).cpu()
	correct += (preds == target_class).sum().item()
	total += len(preds)
	return 100.0 * correct / total if total > 0 else 0.0


	def main():
	args = parse_args()

	device = args.device
	if device is None:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"using device {device}")

	# load model and perturbation
	model, _ = load_resnet18(args.model)
	model.to(device)

	uap = ensure_uap(args)

	# prepare dataset
	test_ds = load_cifar10_test()
	indices = select_candidates(test_ds, args.target_class, args.num_images, seed=args.seed)

	raw_samples = torch.stack([test_ds[i][0] for i in indices])
	triggered = apply_invisible_trigger(raw_samples, uap)

	# print predictions for the small batch
	with torch.no_grad():
	clean_inp = NORMALISE(raw_samples.to(device))
	trig_inp = NORMALISE(triggered.to(device))
	clean_preds = model(clean_inp).argmax(1).cpu().tolist()
	trig_preds = model(trig_inp).argmax(1).cpu().tolist()
	print(f"chosen sample indices: {indices}")
	print(f"clean predictions: {clean_preds}")
	print(f"triggered predictions: {trig_preds}")

	# visualise
	os.makedirs(args.save_dir, exist_ok=True)
	plt.imsave(os.path.join(args.save_dir, "uap.png"),
	make_grid_image(uap.clamp(0, 1).unsqueeze(0), nrow=1))

	plt.imsave(os.path.join(args.save_dir, "clean_examples.png"),
	make_grid_image(raw_samples, nrow=len(raw_samples)))
	plt.imsave(os.path.join(args.save_dir, "triggered_examples.png"),
	make_grid_image(triggered, nrow=len(triggered)))
	print(f"saved grids to {args.save_dir}")

	if args.compute_asr:
	asr = compute_asr(model, test_ds, uap, args.target_class, device)
	print(f"estimated ASR on test set: {asr:.2f}%")


	if __name__ == "__main__":
	main()