| | import torch.nn as nn |
| | import torch.nn.functional as F |
| | import math |
| | import torch |
| | from torch.nn.parameter import Parameter |
| | from torch.nn.modules.module import Module |
| | from deeprobust.graph import utils |
| | from deeprobust.graph.defense import GCN |
| | from sklearn.utils.extmath import randomized_svd |
| | from tqdm import tqdm |
| | import scipy.sparse as sp |
| | import numpy as np |
| | from numba import njit |
| |
|
| | class GCNSVD(GCN): |
| | """GCNSVD is a 2 Layer Graph Convolutional Network with Truncated SVD as |
| | preprocessing. See more details in All You Need Is Low (Rank): Defending |
| | Against Adversarial Attacks on Graphs, |
| | https://dl.acm.org/doi/abs/10.1145/3336191.3371789. |
| | |
| | Parameters |
| | ---------- |
| | nfeat : int |
| | size of input feature dimension |
| | nhid : int |
| | number of hidden units |
| | nclass : int |
| | size of output dimension |
| | dropout : float |
| | dropout rate for GCN |
| | lr : float |
| | learning rate for GCN |
| | weight_decay : float |
| | weight decay coefficient (l2 normalization) for GCN. When `with_relu` is True, `weight_decay` will be set to 0. |
| | with_relu : bool |
| | whether to use relu activation function. If False, GCN will be linearized. |
| | with_bias: bool |
| | whether to include bias term in GCN weights. |
| | device: str |
| | 'cpu' or 'cuda'. |
| | |
| | Examples |
| | -------- |
| | We can first load dataset and then train GCNSVD. |
| | |
| | >>> from deeprobust.graph.data import PrePtbDataset, Dataset |
| | >>> from deeprobust.graph.defense import GCNSVD |
| | >>> # load clean graph data |
| | >>> data = Dataset(root='/tmp/', name='cora', seed=15) |
| | >>> adj, features, labels = data.adj, data.features, data.labels |
| | >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test |
| | >>> # load perturbed graph data |
| | >>> perturbed_data = PrePtbDataset(root='/tmp/', name='cora') |
| | >>> perturbed_adj = perturbed_data.adj |
| | >>> # train defense model |
| | >>> model = GCNSVD(nfeat=features.shape[1], |
| | nhid=16, |
| | nclass=labels.max().item() + 1, |
| | dropout=0.5, device='cpu').to('cpu') |
| | >>> model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=20) |
| | |
| | """ |
| |
|
| | def __init__(self, nfeat, nhid, nclass, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, device='cpu'): |
| |
|
| | super(GCNSVD, self).__init__(nfeat, nhid, nclass, dropout, lr, weight_decay, with_relu, with_bias, device=device) |
| | self.device = device |
| | self.k = None |
| |
|
| | def fit(self, features, adj, labels, idx_train, idx_val=None, k=50, train_iters=200, initialize=True, verbose=True, **kwargs): |
| | """First perform rank-k approximation of adjacency matrix via |
| | truncated SVD, and then train the gcn model on the processed graph, |
| | when idx_val is not None, pick the best model according to |
| | the validation loss. |
| | |
| | Parameters |
| | ---------- |
| | features : |
| | node features |
| | adj : |
| | the adjacency matrix. The format could be torch.tensor or scipy matrix |
| | labels : |
| | node labels |
| | idx_train : |
| | node training indices |
| | idx_val : |
| | node validation indices. If not given (None), GCN training process will not adpot early stopping |
| | k : int |
| | number of singular values and vectors to compute. |
| | train_iters : int |
| | number of training epochs |
| | initialize : bool |
| | whether to initialize parameters before training |
| | verbose : bool |
| | whether to show verbose logs |
| | """ |
| | adj = adj.to('cpu') |
| | modified_adj = self.truncatedSVD(adj, k=k) |
| | self.k = k |
| | |
| | features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device) |
| |
|
| | self.modified_adj = modified_adj |
| | self.features = features |
| | self.labels = labels |
| | super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose) |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | def truncatedSVD(self, data, k=50): |
| | """Fast truncated SVD using randomized algorithm""" |
| | print(f'=== Fast SVD: rank={k} ===') |
| |
|
| | |
| | if isinstance(data, torch.Tensor): |
| | data = data.detach().cpu().numpy() |
| |
|
| | U, S, Vt = randomized_svd(data, n_components=k, n_iter=10, random_state=None) |
| | print("rank_after =", np.count_nonzero(S)) |
| | diag_S = np.diag(S) |
| | return U @ diag_S @ Vt |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| |
|
| | def predict(self, features=None, adj=None): |
| | """By default, the inputs should be unnormalized adjacency |
| | |
| | Parameters |
| | ---------- |
| | features : |
| | node features. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions. |
| | adj : |
| | adjcency matrix. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions. |
| | |
| | |
| | Returns |
| | ------- |
| | torch.FloatTensor |
| | output (log probabilities) of GCNSVD |
| | """ |
| |
|
| | self.eval() |
| | if features is None and adj is None: |
| | return self.forward(self.features, self.adj_norm) |
| | else: |
| | adj = self.truncatedSVD(adj, k=self.k) |
| | if type(adj) is not torch.Tensor: |
| | features, adj = utils.to_tensor(features, adj, device=self.device) |
| |
|
| | self.features = features |
| | if utils.is_sparse_tensor(adj): |
| | self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True) |
| | else: |
| | self.adj_norm = utils.normalize_adj_tensor(adj) |
| | return self.forward(self.features, self.adj_norm) |
| |
|
| |
|
| | class GCNJaccard(GCN): |
| | """GCNJaccard first preprocesses input graph via droppining dissimilar |
| | edges and train a GCN based on the processed graph. See more details in |
| | Adversarial Examples on Graph Data: Deep Insights into Attack and Defense, |
| | https://arxiv.org/pdf/1903.01610.pdf. |
| | |
| | Parameters |
| | ---------- |
| | nfeat : int |
| | size of input feature dimension |
| | nhid : int |
| | number of hidden units |
| | nclass : int |
| | size of output dimension |
| | dropout : float |
| | dropout rate for GCN |
| | lr : float |
| | learning rate for GCN |
| | weight_decay : float |
| | weight decay coefficient (l2 normalization) for GCN. When `with_relu` is True, `weight_decay` will be set to 0. |
| | with_relu : bool |
| | whether to use relu activation function. If False, GCN will be linearized. |
| | with_bias: bool |
| | whether to include bias term in GCN weights. |
| | device: str |
| | 'cpu' or 'cuda'. |
| | |
| | Examples |
| | -------- |
| | We can first load dataset and then train GCNJaccard. |
| | |
| | >>> from deeprobust.graph.data import PrePtbDataset, Dataset |
| | >>> from deeprobust.graph.defense import GCNJaccard |
| | >>> # load clean graph data |
| | >>> data = Dataset(root='/tmp/', name='cora', seed=15) |
| | >>> adj, features, labels = data.adj, data.features, data.labels |
| | >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test |
| | >>> # load perturbed graph data |
| | >>> perturbed_data = PrePtbDataset(root='/tmp/', name='cora') |
| | >>> perturbed_adj = perturbed_data.adj |
| | >>> # train defense model |
| | >>> model = GCNJaccard(nfeat=features.shape[1], |
| | nhid=16, |
| | nclass=labels.max().item() + 1, |
| | dropout=0.5, device='cpu').to('cpu') |
| | >>> model.fit(features, perturbed_adj, labels, idx_train, idx_val, threshold=0.03) |
| | |
| | """ |
| | def __init__(self, nfeat, nhid, nclass, binary_feature=True, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, device='cpu'): |
| |
|
| | super(GCNJaccard, self).__init__(nfeat, nhid, nclass, dropout, lr, weight_decay, with_relu, with_bias, device=device) |
| | self.device = device |
| | self.binary_feature = binary_feature |
| |
|
| | def fit(self, features, adj, labels, idx_train, idx_val=None, threshold=0.01, train_iters=200, initialize=True, verbose=True, **kwargs): |
| | """First drop dissimilar edges with similarity smaller than given |
| | threshold and then train the gcn model on the processed graph. |
| | When idx_val is not None, pick the best model according to the |
| | validation loss. |
| | |
| | Parameters |
| | ---------- |
| | features : |
| | node features. The format can be numpy.array or scipy matrix |
| | adj : |
| | the adjacency matrix. |
| | labels : |
| | node labels |
| | idx_train : |
| | node training indices |
| | idx_val : |
| | node validation indices. If not given (None), GCN training process will not adpot early stopping |
| | threshold : float |
| | similarity threshold for dropping edges. If two connected nodes with similarity smaller than threshold, the edge between them will be removed. |
| | train_iters : int |
| | number of training epochs |
| | initialize : bool |
| | whether to initialize parameters before training |
| | verbose : bool |
| | whether to show verbose logs |
| | """ |
| |
|
| | self.threshold = threshold |
| | modified_adj = self.drop_dissimilar_edges(features, adj) |
| | |
| | features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device) |
| | self.modified_adj = modified_adj |
| | self.features = features |
| | self.labels = labels |
| | super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose) |
| |
|
| | def drop_dissimilar_edges(self, features, adj, metric='similarity'): |
| | """Drop dissimilar edges.(Faster version using numba) |
| | """ |
| | if not sp.issparse(adj): |
| | adj = adj.to('cpu') |
| | adj = sp.csr_matrix(adj) |
| |
|
| | adj_triu = sp.triu(adj, format='csr') |
| |
|
| | if sp.issparse(features): |
| | features = features.todense().A |
| |
|
| | if metric == 'distance': |
| | removed_cnt = dropedge_dis(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold) |
| | else: |
| | if self.binary_feature: |
| | print("self.binary_feature", self.binary_feature) |
| | removed_cnt = dropedge_jaccard(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold) |
| | else: |
| | removed_cnt = dropedge_cosine(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold) |
| | print('removed %s edges in the original graph' % removed_cnt) |
| | modified_adj = adj_triu + adj_triu.transpose() |
| | return modified_adj |
| |
|
| | def predict(self, features=None, adj=None): |
| | """By default, the inputs should be unnormalized adjacency |
| | |
| | Parameters |
| | ---------- |
| | features : |
| | node features. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions. |
| | adj : |
| | adjcency matrix. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions. |
| | |
| | |
| | Returns |
| | ------- |
| | torch.FloatTensor |
| | output (log probabilities) of GCNJaccard |
| | """ |
| |
|
| | self.eval() |
| | if features is None and adj is None: |
| | return self.forward(self.features, self.adj_norm) |
| | else: |
| | adj = self.drop_dissimilar_edges(features, adj) |
| | if type(adj) is not torch.Tensor: |
| | features, adj = utils.to_tensor(features, adj, device=self.device) |
| |
|
| | self.features = features |
| | if utils.is_sparse_tensor(adj): |
| | self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True) |
| | else: |
| | self.adj_norm = utils.normalize_adj_tensor(adj) |
| | return self.forward(self.features, self.adj_norm) |
| |
|
| | def _drop_dissimilar_edges(self, features, adj): |
| | """Drop dissimilar edges. (Slower version) |
| | """ |
| | if not sp.issparse(adj): |
| | adj = sp.csr_matrix(adj) |
| | modified_adj = adj.copy().tolil() |
| |
|
| | |
| | print('=== GCN-Jaccrad ===') |
| | edges = np.array(modified_adj.nonzero()).T |
| | removed_cnt = 0 |
| | for edge in tqdm(edges): |
| | n1 = edge[0] |
| | n2 = edge[1] |
| | if n1 > n2: |
| | continue |
| |
|
| | if self.binary_feature: |
| | J = self._jaccard_similarity(features[n1], features[n2]) |
| |
|
| | if J < self.threshold: |
| | modified_adj[n1, n2] = 0 |
| | modified_adj[n2, n1] = 0 |
| | removed_cnt += 1 |
| | else: |
| | |
| | C = self._cosine_similarity(features[n1], features[n2]) |
| | if C < self.threshold: |
| | modified_adj[n1, n2] = 0 |
| | modified_adj[n2, n1] = 0 |
| | removed_cnt += 1 |
| | print('removed %s edges in the original graph' % removed_cnt) |
| | return modified_adj |
| |
|
| | def _jaccard_similarity(self, a, b): |
| | intersection = a.multiply(b).count_nonzero() |
| | J = intersection * 1.0 / (a.count_nonzero() + b.count_nonzero() - intersection) |
| | return J |
| |
|
| | def _cosine_similarity(self, a, b): |
| | inner_product = (a * b).sum() |
| | C = inner_product / (np.sqrt(np.square(a).sum()) * np.sqrt(np.square(b).sum()) + 1e-10) |
| | return C |
| |
|
| | def __dropedge_jaccard(A, iA, jA, features, threshold): |
| | |
| | removed_cnt = 0 |
| | for row in range(len(iA)-1): |
| | for i in range(iA[row], iA[row+1]): |
| | |
| | n1 = row |
| | n2 = jA[i] |
| | a, b = features[n1], features[n2] |
| |
|
| | intersection = a.multiply(b).count_nonzero() |
| | J = intersection * 1.0 / (a.count_nonzero() + b.count_nonzero() - intersection) |
| |
|
| | if J < threshold: |
| | A[i] = 0 |
| | |
| | removed_cnt += 1 |
| | return removed_cnt |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | @njit |
| | def dropedge_jaccard(A, iA, jA, features, threshold): |
| | removed_cnt = 0 |
| | for row in range(len(iA)-1): |
| | for i in range(iA[row], iA[row+1]): |
| | |
| | n1 = row |
| | n2 = jA[i] |
| | a, b = features[n1], features[n2] |
| | intersection = np.count_nonzero(a*b) |
| | union = np.count_nonzero(a) + np.count_nonzero(b) - intersection |
| | if union == 0: |
| | print("Running safe dropedge_jaccard") |
| | J = 0 |
| | else: |
| | J = intersection * 1.0 / union |
| |
|
| | if J < threshold: |
| | A[i] = 0 |
| | |
| | removed_cnt += 1 |
| | return removed_cnt |
| |
|
| |
|
| | @njit |
| | def dropedge_cosine(A, iA, jA, features, threshold): |
| | removed_cnt = 0 |
| | for row in range(len(iA)-1): |
| | for i in range(iA[row], iA[row+1]): |
| | |
| | n1 = row |
| | n2 = jA[i] |
| | a, b = features[n1], features[n2] |
| | inner_product = (a * b).sum() |
| | C = inner_product / (np.sqrt(np.square(a).sum()) * np.sqrt(np.square(b).sum()) + 1e-8) |
| |
|
| | if C < threshold: |
| | A[i] = 0 |
| | |
| | removed_cnt += 1 |
| | return removed_cnt |
| |
|
| | @njit |
| | def dropedge_dis(A, iA, jA, features, threshold): |
| | removed_cnt = 0 |
| | for row in range(len(iA)-1): |
| | for i in range(iA[row], iA[row+1]): |
| | |
| | n1 = row |
| | n2 = jA[i] |
| | C = np.linalg.norm(features[n1] - features[n2]) |
| | if C > threshold: |
| | A[i] = 0 |
| | |
| | removed_cnt += 1 |
| |
|
| | return removed_cnt |
| |
|
| | @njit |
| | def dropedge_both(A, iA, jA, features, threshold1=2.5, threshold2=0.01): |
| | removed_cnt = 0 |
| | for row in range(len(iA)-1): |
| | for i in range(iA[row], iA[row+1]): |
| | |
| | n1 = row |
| | n2 = jA[i] |
| | C1 = np.linalg.norm(features[n1] - features[n2]) |
| |
|
| | a, b = features[n1], features[n2] |
| | inner_product = (a * b).sum() |
| | C2 = inner_product / (np.sqrt(np.square(a).sum() + np.square(b).sum())+ 1e-6) |
| | if C1 > threshold1 or threshold2 < 0: |
| | A[i] = 0 |
| | |
| | removed_cnt += 1 |
| |
|
| | return removed_cnt |
| |
|
| |
|
| | if __name__ == "__main__": |
| | from deeprobust.graph.data import PrePtbDataset, Dataset |
| | |
| | dataset_str = 'pubmed' |
| | data = Dataset(root='/tmp/', name=dataset_str, seed=15) |
| | adj, features, labels = data.adj, data.features, data.labels |
| | idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test |
| | |
| | perturbed_data = PrePtbDataset(root='/tmp/', name=dataset_str) |
| | perturbed_adj = perturbed_data.adj |
| | |
| | print("Test GCNJaccard") |
| | model = GCNJaccard(nfeat=features.shape[1], |
| | nhid=16, |
| | nclass=labels.max().item() + 1, |
| | binary_feature=False, |
| | dropout=0.5, device='cuda').to('cuda') |
| | model.fit(features, perturbed_adj, labels, idx_train, idx_val, threshold=0.1) |
| | model.test(idx_test) |
| | prediction_1 = model.predict() |
| | prediction_2 = model.predict(features, perturbed_adj) |
| | assert (prediction_1 != prediction_2).sum() == 0 |
| |
|
| | print("Test GCNSVD") |
| | model = GCNSVD(nfeat=features.shape[1], |
| | nhid=16, |
| | nclass=labels.max().item() + 1, |
| | dropout=0.5, device='cuda').to('cuda') |
| | model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=20) |
| | model.test(idx_test) |
| | prediction_1 = model.predict() |
| | prediction_2 = model.predict(features, perturbed_adj) |
| | assert (prediction_1 - prediction_2).mean() < 1e-5 |
| |
|
| |
|