|
|
""" |
|
|
Adversarial Examples on Graph Data: Deep Insights into Attack and Defense |
|
|
https://arxiv.org/pdf/1903.01610.pdf |
|
|
""" |
|
|
|
|
|
import torch |
|
|
import torch.multiprocessing as mp |
|
|
from deeprobust.graph.targeted_attack import BaseAttack |
|
|
from torch.nn.parameter import Parameter |
|
|
from deeprobust.graph import utils |
|
|
import torch.nn.functional as F |
|
|
import numpy as np |
|
|
import scipy.sparse as sp |
|
|
|
|
|
from torch import optim |
|
|
from torch.nn import functional as F |
|
|
from torch.nn.modules.module import Module |
|
|
import numpy as np |
|
|
from tqdm import tqdm |
|
|
import math |
|
|
import scipy.sparse as sp |
|
|
|
|
|
class IGAttack(BaseAttack): |
|
|
"""IGAttack: IG-FGSM. Adversarial Examples on Graph Data: Deep Insights into Attack and Defense, https://arxiv.org/pdf/1903.01610.pdf. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
model : |
|
|
model to attack |
|
|
nnodes : int |
|
|
number of nodes in the input graph |
|
|
feature_shape : tuple |
|
|
shape of the input node features |
|
|
attack_structure : bool |
|
|
whether to attack graph structure |
|
|
attack_features : bool |
|
|
whether to attack node features |
|
|
device: str |
|
|
'cpu' or 'cuda' |
|
|
|
|
|
Examples |
|
|
-------- |
|
|
|
|
|
>>> from deeprobust.graph.data import Dataset |
|
|
>>> from deeprobust.graph.defense import GCN |
|
|
>>> from deeprobust.graph.targeted_attack import IGAttack |
|
|
>>> data = Dataset(root='/tmp/', name='cora') |
|
|
>>> adj, features, labels = data.adj, data.features, data.labels |
|
|
>>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test |
|
|
>>> # Setup Surrogate model |
|
|
>>> surrogate = GCN(nfeat=features.shape[1], nclass=labels.max().item()+1, |
|
|
nhid=16, dropout=0, with_relu=False, with_bias=False, device='cpu').to('cpu') |
|
|
>>> surrogate.fit(features, adj, labels, idx_train, idx_val, patience=30) |
|
|
>>> # Setup Attack Model |
|
|
>>> target_node = 0 |
|
|
>>> model = IGAttack(surrogate, nnodes=adj.shape[0], attack_structure=True, attack_features=True, device='cpu').to('cpu') |
|
|
>>> # Attack |
|
|
>>> model.attack(features, adj, labels, idx_train, target_node, n_perturbations=5, steps=10) |
|
|
>>> modified_adj = model.modified_adj |
|
|
>>> modified_features = model.modified_features |
|
|
|
|
|
""" |
|
|
|
|
|
def __init__(self, model, nnodes=None, feature_shape=None, attack_structure=True, attack_features=True, device='cpu'): |
|
|
|
|
|
super(IGAttack, self).__init__(model, nnodes, attack_structure, attack_features, device) |
|
|
|
|
|
assert attack_features or attack_structure, 'attack_features or attack_structure cannot be both False' |
|
|
|
|
|
self.modified_adj = None |
|
|
self.modified_features = None |
|
|
self.target_node = None |
|
|
|
|
|
def attack(self, ori_features, ori_adj, labels, idx_train, target_node, n_perturbations, steps=10, **kwargs): |
|
|
"""Generate perturbations on the input graph. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
ori_features : |
|
|
Original (unperturbed) node feature matrix |
|
|
ori_adj : |
|
|
Original (unperturbed) adjacency matrix |
|
|
labels : |
|
|
node labels |
|
|
idx_train: |
|
|
training nodes indices |
|
|
target_node : int |
|
|
target node index to be attacked |
|
|
n_perturbations : int |
|
|
Number of perturbations on the input graph. Perturbations could |
|
|
be edge removals/additions or feature removals/additions. |
|
|
steps : int |
|
|
steps for computing integrated gradients |
|
|
""" |
|
|
|
|
|
self.surrogate.eval() |
|
|
self.target_node = target_node |
|
|
|
|
|
|
|
|
modified_adj = ori_adj.todense() |
|
|
modified_features = ori_features.todense() |
|
|
adj, features, labels = utils.to_tensor(modified_adj, modified_features, labels, device=self.device) |
|
|
adj_norm = utils.normalize_adj_tensor(adj) |
|
|
|
|
|
pseudo_labels = self.surrogate.predict().detach().argmax(1) |
|
|
pseudo_labels[idx_train] = labels[idx_train] |
|
|
self.pseudo_labels = pseudo_labels |
|
|
|
|
|
s_e = np.zeros(adj.shape[1]) |
|
|
s_f = np.zeros(features.shape[1]) |
|
|
if self.attack_structure: |
|
|
s_e = self.calc_importance_edge(features, adj_norm, labels, steps) |
|
|
if self.attack_features: |
|
|
s_f = self.calc_importance_feature(features, adj_norm, labels, steps) |
|
|
|
|
|
for t in (range(n_perturbations)): |
|
|
s_e_max = np.argmax(s_e) |
|
|
s_f_max = np.argmax(s_f) |
|
|
|
|
|
if s_e[s_e_max] >= s_f[s_f_max]: |
|
|
|
|
|
if self.attack_structure: |
|
|
value = np.abs(1 - modified_adj[target_node, s_e_max]) |
|
|
modified_adj[target_node, s_e_max] = value |
|
|
modified_adj[s_e_max, target_node] = value |
|
|
s_e[s_e_max] = 0 |
|
|
else: |
|
|
raise Exception("""No posisble perturbation on the structure can be made! |
|
|
See https://github.com/DSE-MSU/DeepRobust/issues/42 for more details.""") |
|
|
else: |
|
|
|
|
|
if self.attack_features: |
|
|
modified_features[target_node, s_f_max] = np.abs(1 - modified_features[target_node, s_f_max]) |
|
|
s_f[s_f_max] = 0 |
|
|
else: |
|
|
raise Exception("""No posisble perturbation on the features can be made! |
|
|
See https://github.com/DSE-MSU/DeepRobust/issues/42 for more details.""") |
|
|
|
|
|
|
|
|
self.modified_adj = sp.csr_matrix(modified_adj) |
|
|
self.modified_features = sp.csr_matrix(modified_features) |
|
|
self.check_adj(modified_adj) |
|
|
|
|
|
def calc_importance_edge(self, features, adj_norm, labels, steps): |
|
|
"""Calculate integrated gradient for edges. Although I think the the gradient should be |
|
|
with respect to adj instead of adj_norm, but the calculation is too time-consuming. So I |
|
|
finally decided to calculate the gradient of loss with respect to adj_norm |
|
|
""" |
|
|
baseline_add = adj_norm.clone() |
|
|
baseline_remove = adj_norm.clone() |
|
|
baseline_add.data[self.target_node] = 1 |
|
|
baseline_remove.data[self.target_node] = 0 |
|
|
adj_norm.requires_grad = True |
|
|
integrated_grad_list = [] |
|
|
|
|
|
i = self.target_node |
|
|
for j in tqdm(range(adj_norm.shape[1])): |
|
|
if adj_norm[i][j]: |
|
|
scaled_inputs = [baseline_remove + (float(k)/ steps) * (adj_norm - baseline_remove) for k in range(0, steps + 1)] |
|
|
else: |
|
|
scaled_inputs = [baseline_add - (float(k)/ steps) * (baseline_add - adj_norm) for k in range(0, steps + 1)] |
|
|
_sum = 0 |
|
|
|
|
|
for new_adj in scaled_inputs: |
|
|
output = self.surrogate(features, new_adj) |
|
|
loss = F.nll_loss(output[[self.target_node]], |
|
|
self.pseudo_labels[[self.target_node]]) |
|
|
adj_grad = torch.autograd.grad(loss, adj_norm)[0] |
|
|
adj_grad = adj_grad[i][j] |
|
|
_sum += adj_grad |
|
|
|
|
|
if adj_norm[i][j]: |
|
|
avg_grad = (adj_norm[i][j] - 0) * _sum.mean() |
|
|
else: |
|
|
avg_grad = (1 - adj_norm[i][j]) * _sum.mean() |
|
|
|
|
|
integrated_grad_list.append(avg_grad.detach().item()) |
|
|
|
|
|
integrated_grad_list[i] = 0 |
|
|
|
|
|
integrated_grad_list = np.array(integrated_grad_list) |
|
|
adj = (adj_norm > 0).cpu().numpy() |
|
|
integrated_grad_list = (-2 * adj[self.target_node] + 1) * integrated_grad_list |
|
|
integrated_grad_list[self.target_node] = -10 |
|
|
return integrated_grad_list |
|
|
|
|
|
def calc_importance_feature(self, features, adj_norm, labels, steps): |
|
|
"""Calculate integrated gradient for features |
|
|
""" |
|
|
baseline_add = features.clone() |
|
|
baseline_remove = features.clone() |
|
|
baseline_add.data[self.target_node] = 1 |
|
|
baseline_remove.data[self.target_node] = 0 |
|
|
|
|
|
features.requires_grad = True |
|
|
integrated_grad_list = [] |
|
|
i = self.target_node |
|
|
for j in tqdm(range(features.shape[1])): |
|
|
if features[i][j]: |
|
|
scaled_inputs = [baseline_add + (float(k)/ steps) * (features - baseline_add) for k in range(0, steps + 1)] |
|
|
else: |
|
|
scaled_inputs = [baseline_remove - (float(k)/ steps) * (baseline_remove - features) for k in range(0, steps + 1)] |
|
|
_sum = 0 |
|
|
|
|
|
for new_features in scaled_inputs: |
|
|
output = self.surrogate(new_features, adj_norm) |
|
|
loss = F.nll_loss(output[[self.target_node]], |
|
|
self.pseudo_labels[[self.target_node]]) |
|
|
|
|
|
feature_grad = torch.autograd.grad(loss, features)[0] |
|
|
feature_grad = feature_grad[i][j] |
|
|
_sum += feature_grad |
|
|
|
|
|
if features[i][j]: |
|
|
avg_grad = (features[i][j] - 0) * _sum.mean() |
|
|
else: |
|
|
avg_grad = (1 - features[i][j]) * _sum.mean() |
|
|
integrated_grad_list.append(avg_grad.detach().item()) |
|
|
|
|
|
features = (features > 0).cpu().numpy() |
|
|
integrated_grad_list = np.array(integrated_grad_list) |
|
|
integrated_grad_list = (-2 * features[self.target_node] + 1) * integrated_grad_list |
|
|
return integrated_grad_list |
|
|
|
|
|
|