import torch.nn as nn import torch.nn.functional as F import math import torch from torch.nn.parameter import Parameter from torch.nn.modules.module import Module from deeprobust.graph import utils from deeprobust.graph.defense import GCN from sklearn.utils.extmath import randomized_svd from tqdm import tqdm import scipy.sparse as sp import numpy as np from numba import njit class GCNSVD(GCN): """GCNSVD is a 2 Layer Graph Convolutional Network with Truncated SVD as preprocessing. See more details in All You Need Is Low (Rank): Defending Against Adversarial Attacks on Graphs, https://dl.acm.org/doi/abs/10.1145/3336191.3371789. Parameters ---------- nfeat : int size of input feature dimension nhid : int number of hidden units nclass : int size of output dimension dropout : float dropout rate for GCN lr : float learning rate for GCN weight_decay : float weight decay coefficient (l2 normalization) for GCN. When `with_relu` is True, `weight_decay` will be set to 0. with_relu : bool whether to use relu activation function. If False, GCN will be linearized. with_bias: bool whether to include bias term in GCN weights. device: str 'cpu' or 'cuda'. Examples -------- We can first load dataset and then train GCNSVD. >>> from deeprobust.graph.data import PrePtbDataset, Dataset >>> from deeprobust.graph.defense import GCNSVD >>> # load clean graph data >>> data = Dataset(root='/tmp/', name='cora', seed=15) >>> adj, features, labels = data.adj, data.features, data.labels >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test >>> # load perturbed graph data >>> perturbed_data = PrePtbDataset(root='/tmp/', name='cora') >>> perturbed_adj = perturbed_data.adj >>> # train defense model >>> model = GCNSVD(nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1, dropout=0.5, device='cpu').to('cpu') >>> model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=20) """ def __init__(self, nfeat, nhid, nclass, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, device='cpu'): super(GCNSVD, self).__init__(nfeat, nhid, nclass, dropout, lr, weight_decay, with_relu, with_bias, device=device) self.device = device self.k = None def fit(self, features, adj, labels, idx_train, idx_val=None, k=50, train_iters=200, initialize=True, verbose=True, **kwargs): """First perform rank-k approximation of adjacency matrix via truncated SVD, and then train the gcn model on the processed graph, when idx_val is not None, pick the best model according to the validation loss. Parameters ---------- features : node features adj : the adjacency matrix. The format could be torch.tensor or scipy matrix labels : node labels idx_train : node training indices idx_val : node validation indices. If not given (None), GCN training process will not adpot early stopping k : int number of singular values and vectors to compute. train_iters : int number of training epochs initialize : bool whether to initialize parameters before training verbose : bool whether to show verbose logs """ adj = adj.to('cpu') modified_adj = self.truncatedSVD(adj, k=k) self.k = k # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj) features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device) self.modified_adj = modified_adj self.features = features self.labels = labels super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose) # def truncatedSVD(self, data, k=50): # """Truncated SVD on input data. # Parameters # ---------- # data : # input matrix to be decomposed # k : int # number of singular values and vectors to compute. # Returns # ------- # numpy.array # reconstructed matrix. # """ # print('=== GCN-SVD: rank={} ==='.format(k)) # print("stage1") # if sp.issparse(data): # data = data.asfptype() # U, S, V = sp.linalg.svds(data, k=k) # print("rank_after = {}".format(len(S.nonzero()[0]))) # diag_S = np.diag(S) # else: # U, S, V = np.linalg.svd(data) # U = U[:, :k] # S = S[:k] # V = V[:k, :] # print("rank_before = {}".format(len(S.nonzero()[0]))) # diag_S = np.diag(S) # print("rank_after = {}".format(len(diag_S.nonzero()[0]))) # return U @ diag_S @ V def truncatedSVD(self, data, k=50): """Fast truncated SVD using randomized algorithm""" print(f'=== Fast SVD: rank={k} ===') # Convert torch Tensor to NumPy if isinstance(data, torch.Tensor): data = data.detach().cpu().numpy() U, S, Vt = randomized_svd(data, n_components=k, n_iter=10, random_state=None) print("rank_after =", np.count_nonzero(S)) diag_S = np.diag(S) return U @ diag_S @ Vt # def truncatedSVD(self, data, k=50): # """Truncated SVD on input data using GPU (torch.linalg.svd), returning numpy array.""" # print('=== GCN-SVD (GPU): rank={} ==='.format(k)) # # 转换为 dense 并放到 GPU # if sp.issparse(data): # print("[Info] Converting sparse matrix to dense (may be memory-intensive).") # data = torch.from_numpy(data.toarray()).float().to(self.device) # elif isinstance(data, np.ndarray): # data = torch.from_numpy(data).float().to(self.device) # elif isinstance(data, torch.Tensor): # data = data.float().to(self.device) # else: # raise TypeError("Unsupported data type for SVD.") # print("stage1") # # # U, S, Vh = torch.linalg.svd(data, full_matrices=False) # print("stage2") # # # U_k = U[:, :k] # S_k = S[:k] # V_k = Vh[:k, :] # print(f"Singular values used (non-zero count): {(S_k != 0).sum().item()} / {len(S_k)}") # # 还原矩阵:U * S * V # diag_S = torch.diag(S_k) # reconstructed = U_k @ diag_S @ V_k # # 返回 numpy 类型(保持一致) # return reconstructed.detach().cpu().numpy() def predict(self, features=None, adj=None): """By default, the inputs should be unnormalized adjacency Parameters ---------- features : node features. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions. adj : adjcency matrix. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions. Returns ------- torch.FloatTensor output (log probabilities) of GCNSVD """ self.eval() if features is None and adj is None: return self.forward(self.features, self.adj_norm) else: adj = self.truncatedSVD(adj, k=self.k) if type(adj) is not torch.Tensor: features, adj = utils.to_tensor(features, adj, device=self.device) self.features = features if utils.is_sparse_tensor(adj): self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True) else: self.adj_norm = utils.normalize_adj_tensor(adj) return self.forward(self.features, self.adj_norm) class GCNJaccard(GCN): """GCNJaccard first preprocesses input graph via droppining dissimilar edges and train a GCN based on the processed graph. See more details in Adversarial Examples on Graph Data: Deep Insights into Attack and Defense, https://arxiv.org/pdf/1903.01610.pdf. Parameters ---------- nfeat : int size of input feature dimension nhid : int number of hidden units nclass : int size of output dimension dropout : float dropout rate for GCN lr : float learning rate for GCN weight_decay : float weight decay coefficient (l2 normalization) for GCN. When `with_relu` is True, `weight_decay` will be set to 0. with_relu : bool whether to use relu activation function. If False, GCN will be linearized. with_bias: bool whether to include bias term in GCN weights. device: str 'cpu' or 'cuda'. Examples -------- We can first load dataset and then train GCNJaccard. >>> from deeprobust.graph.data import PrePtbDataset, Dataset >>> from deeprobust.graph.defense import GCNJaccard >>> # load clean graph data >>> data = Dataset(root='/tmp/', name='cora', seed=15) >>> adj, features, labels = data.adj, data.features, data.labels >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test >>> # load perturbed graph data >>> perturbed_data = PrePtbDataset(root='/tmp/', name='cora') >>> perturbed_adj = perturbed_data.adj >>> # train defense model >>> model = GCNJaccard(nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1, dropout=0.5, device='cpu').to('cpu') >>> model.fit(features, perturbed_adj, labels, idx_train, idx_val, threshold=0.03) """ def __init__(self, nfeat, nhid, nclass, binary_feature=True, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, device='cpu'): super(GCNJaccard, self).__init__(nfeat, nhid, nclass, dropout, lr, weight_decay, with_relu, with_bias, device=device) self.device = device self.binary_feature = binary_feature def fit(self, features, adj, labels, idx_train, idx_val=None, threshold=0.01, train_iters=200, initialize=True, verbose=True, **kwargs): """First drop dissimilar edges with similarity smaller than given threshold and then train the gcn model on the processed graph. When idx_val is not None, pick the best model according to the validation loss. Parameters ---------- features : node features. The format can be numpy.array or scipy matrix adj : the adjacency matrix. labels : node labels idx_train : node training indices idx_val : node validation indices. If not given (None), GCN training process will not adpot early stopping threshold : float similarity threshold for dropping edges. If two connected nodes with similarity smaller than threshold, the edge between them will be removed. train_iters : int number of training epochs initialize : bool whether to initialize parameters before training verbose : bool whether to show verbose logs """ self.threshold = threshold modified_adj = self.drop_dissimilar_edges(features, adj) # modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj) features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device) self.modified_adj = modified_adj self.features = features self.labels = labels super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose) def drop_dissimilar_edges(self, features, adj, metric='similarity'): """Drop dissimilar edges.(Faster version using numba) """ if not sp.issparse(adj): adj = adj.to('cpu') adj = sp.csr_matrix(adj) adj_triu = sp.triu(adj, format='csr') if sp.issparse(features): features = features.todense().A # make it easier for njit processing if metric == 'distance': removed_cnt = dropedge_dis(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold) else: if self.binary_feature: print("self.binary_feature", self.binary_feature) removed_cnt = dropedge_jaccard(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold) else: removed_cnt = dropedge_cosine(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold) print('removed %s edges in the original graph' % removed_cnt) modified_adj = adj_triu + adj_triu.transpose() return modified_adj def predict(self, features=None, adj=None): """By default, the inputs should be unnormalized adjacency Parameters ---------- features : node features. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions. adj : adjcency matrix. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions. Returns ------- torch.FloatTensor output (log probabilities) of GCNJaccard """ self.eval() if features is None and adj is None: return self.forward(self.features, self.adj_norm) else: adj = self.drop_dissimilar_edges(features, adj) if type(adj) is not torch.Tensor: features, adj = utils.to_tensor(features, adj, device=self.device) self.features = features if utils.is_sparse_tensor(adj): self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True) else: self.adj_norm = utils.normalize_adj_tensor(adj) return self.forward(self.features, self.adj_norm) def _drop_dissimilar_edges(self, features, adj): """Drop dissimilar edges. (Slower version) """ if not sp.issparse(adj): adj = sp.csr_matrix(adj) modified_adj = adj.copy().tolil() # preprocessing based on features print('=== GCN-Jaccrad ===') edges = np.array(modified_adj.nonzero()).T removed_cnt = 0 for edge in tqdm(edges): n1 = edge[0] n2 = edge[1] if n1 > n2: continue if self.binary_feature: J = self._jaccard_similarity(features[n1], features[n2]) if J < self.threshold: modified_adj[n1, n2] = 0 modified_adj[n2, n1] = 0 removed_cnt += 1 else: # For not binary feature, use cosine similarity C = self._cosine_similarity(features[n1], features[n2]) if C < self.threshold: modified_adj[n1, n2] = 0 modified_adj[n2, n1] = 0 removed_cnt += 1 print('removed %s edges in the original graph' % removed_cnt) return modified_adj def _jaccard_similarity(self, a, b): intersection = a.multiply(b).count_nonzero() J = intersection * 1.0 / (a.count_nonzero() + b.count_nonzero() - intersection) return J def _cosine_similarity(self, a, b): inner_product = (a * b).sum() C = inner_product / (np.sqrt(np.square(a).sum()) * np.sqrt(np.square(b).sum()) + 1e-10) return C def __dropedge_jaccard(A, iA, jA, features, threshold): # deprecated: for sparse feature matrix... removed_cnt = 0 for row in range(len(iA)-1): for i in range(iA[row], iA[row+1]): # print(row, jA[i], A[i]) n1 = row n2 = jA[i] a, b = features[n1], features[n2] intersection = a.multiply(b).count_nonzero() J = intersection * 1.0 / (a.count_nonzero() + b.count_nonzero() - intersection) if J < threshold: A[i] = 0 # A[n2, n1] = 0 removed_cnt += 1 return removed_cnt # @njit # def dropedge_jaccard(A, iA, jA, features, threshold): # removed_cnt = 0 # for row in range(len(iA)-1): # for i in range(iA[row], iA[row+1]): # # print(row, jA[i], A[i]) # n1 = row # n2 = jA[i] # a, b = features[n1], features[n2] # intersection = np.count_nonzero(a*b) # J = intersection * 1.0 / (np.count_nonzero(a) + np.count_nonzero(b) - intersection) # if J < threshold: # A[i] = 0 # # A[n2, n1] = 0 # removed_cnt += 1 # return removed_cnt @njit def dropedge_jaccard(A, iA, jA, features, threshold): removed_cnt = 0 for row in range(len(iA)-1): for i in range(iA[row], iA[row+1]): # print(row, jA[i], A[i]) n1 = row n2 = jA[i] a, b = features[n1], features[n2] intersection = np.count_nonzero(a*b) union = np.count_nonzero(a) + np.count_nonzero(b) - intersection if union == 0: print("Running safe dropedge_jaccard") J = 0 else: J = intersection * 1.0 / union if J < threshold: A[i] = 0 # A[n2, n1] = 0 removed_cnt += 1 return removed_cnt @njit def dropedge_cosine(A, iA, jA, features, threshold): removed_cnt = 0 for row in range(len(iA)-1): for i in range(iA[row], iA[row+1]): # print(row, jA[i], A[i]) n1 = row n2 = jA[i] a, b = features[n1], features[n2] inner_product = (a * b).sum() C = inner_product / (np.sqrt(np.square(a).sum()) * np.sqrt(np.square(b).sum()) + 1e-8) if C < threshold: A[i] = 0 # A[n2, n1] = 0 removed_cnt += 1 return removed_cnt @njit def dropedge_dis(A, iA, jA, features, threshold): removed_cnt = 0 for row in range(len(iA)-1): for i in range(iA[row], iA[row+1]): # print(row, jA[i], A[i]) n1 = row n2 = jA[i] C = np.linalg.norm(features[n1] - features[n2]) if C > threshold: A[i] = 0 # A[n2, n1] = 0 removed_cnt += 1 return removed_cnt @njit def dropedge_both(A, iA, jA, features, threshold1=2.5, threshold2=0.01): removed_cnt = 0 for row in range(len(iA)-1): for i in range(iA[row], iA[row+1]): # print(row, jA[i], A[i]) n1 = row n2 = jA[i] C1 = np.linalg.norm(features[n1] - features[n2]) a, b = features[n1], features[n2] inner_product = (a * b).sum() C2 = inner_product / (np.sqrt(np.square(a).sum() + np.square(b).sum())+ 1e-6) if C1 > threshold1 or threshold2 < 0: A[i] = 0 # A[n2, n1] = 0 removed_cnt += 1 return removed_cnt if __name__ == "__main__": from deeprobust.graph.data import PrePtbDataset, Dataset # load clean graph data dataset_str = 'pubmed' data = Dataset(root='/tmp/', name=dataset_str, seed=15) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test # load perturbed graph data perturbed_data = PrePtbDataset(root='/tmp/', name=dataset_str) perturbed_adj = perturbed_data.adj # train defense model print("Test GCNJaccard") model = GCNJaccard(nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1, binary_feature=False, dropout=0.5, device='cuda').to('cuda') model.fit(features, perturbed_adj, labels, idx_train, idx_val, threshold=0.1) model.test(idx_test) prediction_1 = model.predict() prediction_2 = model.predict(features, perturbed_adj) assert (prediction_1 != prediction_2).sum() == 0 print("Test GCNSVD") model = GCNSVD(nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1, dropout=0.5, device='cuda').to('cuda') model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=20) model.test(idx_test) prediction_1 = model.predict() prediction_2 = model.predict(features, perturbed_adj) assert (prediction_1 - prediction_2).mean() < 1e-5