import torch import numpy as np import torch.nn.functional as F from torch import nn def KL(P,Q,mask=None): eps = 0.0000001 d = (P+eps).log()-(Q+eps).log() d = P*d if mask !=None: d = d*mask return torch.sum(d) def CE(P,Q,mask=None): return KL(P,Q,mask)+KL(1-P,1-Q,mask) def algorithm2(P,Q,Y): eps = 0.0000001 mean = P.mean(dim=1) mask1 = P>=mean mask2 = Y == Y.t() mask = mask1 == mask2 loss =torch.mean(P * torch.log((P + eps) / (Q + eps))) return loss def umap(output_net, target_net, eps=0.0000001): # Normalize each vector by its norm (n, d) = output_net.shape output_net_norm = torch.sqrt(torch.sum(output_net ** 2, dim=1, keepdim=True)) output_net = output_net / (output_net_norm + eps) output_net[output_net != output_net] = 0 target_net_norm = torch.sqrt(torch.sum(target_net ** 2, dim=1, keepdim=True)) target_net = target_net / (target_net_norm + eps) target_net[target_net != target_net] = 0 # Calculate the cosine similarity model_similarity = torch.mm(output_net, output_net.transpose(0, 1)) model_distance = 1-model_similarity #[0,2] model_distance[range(n), range(n)] = 3 model_distance = model_distance - torch.min(model_distance, dim=1)[0].view(-1, 1) model_distance[range(n), range(n)] = 0 model_similarity = 1-model_distance target_similarity = torch.mm(target_net, target_net.transpose(0, 1)) target_distance = 1-target_similarity target_distance[range(n), range(n)] = 3 target_distance = target_distance - torch.min(target_distance,dim=1)[0].view(-1,1) target_distance[range(n), range(n)] = 0 target_similarity = 1 - target_distance # Scale cosine similarity to 0..1 model_similarity = (model_similarity + 1.0) / 2.0 target_similarity = (target_similarity + 1.0) / 2.0 # Transform them into probabilities model_similarity = model_similarity / torch.sum(model_similarity, dim=1, keepdim=True) target_similarity = target_similarity / torch.sum(target_similarity, dim=1, keepdim=True) # Calculate the KL-divergence loss = CE(target_similarity,model_similarity) return loss def supervised_umap(output_net, target_net,y,sample_weight=0, eps=0.0000001): # Normalize each vector by its norm (n, d) = output_net.shape distance = 2.0 tahn = nn.Tanh() sample_weight = sample_weight.view(-1,n) sample_weight_matrix = (sample_weight+sample_weight.t())/32.0 sample_weight_matrix = tahn(sample_weight_matrix) y = y.view(-1,n) # print("y",y) mask =1- (y==y.t()).float() mask[mask == 0] = -1 distance = distance*mask*sample_weight_matrix output_net_norm = torch.sqrt(torch.sum(output_net ** 2, dim=1, keepdim=True)) output_net = output_net / (output_net_norm + eps) output_net[output_net != output_net] = 0 target_net_norm = torch.sqrt(torch.sum(target_net ** 2, dim=1, keepdim=True)) target_net = target_net / (target_net_norm + eps) target_net[target_net != target_net] = 0 # Calculate the cosine similarity model_similarity = torch.mm(output_net, output_net.transpose(0, 1)) model_distance = 1 - model_similarity # [0,2] model_distance[range(n), range(n)] = 100000 model_distance = model_distance - torch.min(model_distance, dim=1)[0].view(-1, 1) model_distance[range(n), range(n)] = 0 model_distance = torch.clamp(model_distance, 0+eps, 2.0-eps) model_similarity = 1 - model_distance target_similarity = torch.mm(target_net, target_net.transpose(0, 1)) target_distance = 1 - target_similarity target_distance[range(n), range(n)] = 100000 p = torch.min(target_distance, dim=1) target_distance = target_distance - p[0].view(-1, 1) target_distance[range(n), range(n)] = 0 target_distance = (1 - sample_weight_matrix) * target_distance + distance target_distance = torch.clamp(target_distance, 0+eps, 2.0-eps) target_similarity = 1 - target_distance # Scale cosine similarity to 0..1 model_similarity = (model_similarity + 1.0) / 2.0 target_similarity = (target_similarity + 1.0) / 2.0 # # Transform them into probabilities model_similarity = model_similarity / torch.sum(model_similarity, dim=1, keepdim=True) target_similarity = target_similarity / torch.sum(target_similarity, dim=1, keepdim=True) # Calculate the CE-Loss loss = CE(target_similarity, model_similarity) return loss def cosine_similarity_loss(output_net, target_net, eps=0.0000001): # Normalize each vector by its norm output_net_norm = torch.sqrt(torch.sum(output_net ** 2, dim=1, keepdim=True)) output_net = output_net / (output_net_norm + eps) output_net[output_net != output_net] = 0 target_net_norm = torch.sqrt(torch.sum(target_net ** 2, dim=1, keepdim=True)) target_net = target_net / (target_net_norm + eps) target_net[target_net != target_net] = 0 # Calculate the cosine similarity model_similarity = torch.mm(output_net, output_net.transpose(0, 1)) target_similarity = torch.mm(target_net, target_net.transpose(0, 1)) # Scale cosine similarity to 0..1 model_similarity = (model_similarity + 1.0) / 2.0 target_similarity = (target_similarity + 1.0) / 2.0 # Transform them into probabilities model_similarity = model_similarity / torch.sum(model_similarity, dim=1, keepdim=True) target_similarity = target_similarity / torch.sum(target_similarity, dim=1, keepdim=True) # Calculate the KL-divergence loss = CE(target_similarity, model_similarity) return loss class dual_softmax_loss(nn.Module): def __init__(self, ): super(dual_softmax_loss, self).__init__() def forward(self, sim_matrix, temp=1000): sim_matrix = sim_matrix * F.softmax(sim_matrix / temp, dim=0) * len( sim_matrix) # With an appropriate temperature parameter, the model achieves higher performance logpt = F.log_softmax(sim_matrix, dim=-1) # row softmax and column softmax logpt = torch.diag(logpt) loss = -logpt return loss def log_sum_exp(x): '''Utility function for computing log-sun-exp while determining This will be used to determine unaveraged confidence loss across all examples in a batch. ''' x_max = x.data.max() return torch.log(torch.sum(torch.exp(x - x_max), -1, keepdim=True)) + x_max def l2norm(X, dim, eps=1e-8): """L2-normalize columns of X """ norm = torch.pow(X, 2).sum(dim=dim, keepdim=True).sqrt() + eps X = torch.div(X, norm) return X def calcdist(img, txt): ''' Input img = (batch,dim), txt = (batch,dim) Output Euclid Distance Matrix = Tensor(batch,batch), and dist[i,j] = d(img_i,txt_j) ''' dist = img.unsqueeze(1) - txt.unsqueeze(0) dist = torch.sum(torch.pow(dist, 2), dim=2) return torch.sqrt(dist) def calcmatch(label): ''' Input label = (batch,) Output Match Matrix =Tensor(batch,batch) and match[i,j] == 1 iff. label[i]==label[j] ''' match = label.unsqueeze(1) - label.unsqueeze(0) match[match != 0] = 1 return 1 - match def calcneg(dist, label, anchor, positive): ''' Input dist = (batch,batch), label = (batch,), anchor = index, positive = index Output chosen negative sample index ''' standard = dist[anchor, positive] # positive distance dist = dist[anchor] - standard # distance of other samples if max(dist[label != label[anchor]]) >= 0: # there exists valid negative dist[dist < 0] = max(dist) + 2 # delete negative samples below standard dist[label == label[anchor]] = max(dist) + 2 # delete positive samples return int(torch.argmin(dist).cpu()) # return the closest negative sample else: # choose argmax dist[label == label[anchor]] = min(dist) - 2 # delete positive samples return int(torch.argmax(dist).cpu()) def calcneg_dot(img, txt, match, anchor, positive): ''' Input img = (batch,dim), txt = (batch,dim), match = (batch,batch), anchor = index, positive = index Output chosen negative sample index ''' distdot = torch.sum(torch.mul(img.unsqueeze(1), txt.unsqueeze(0)), 2) distdot[match == 1] = -66666 return int(torch.argmax(distdot[anchor]).cpu()) def Triplet(img, txt, label): ''' Input img = (batch,dim), txt = (batch,dim), label = (batch,) Output dist = (batch,batch),match = (batch,batch), triplets = List with shape(pairs,3) ''' triplet_list = [] batch = img.shape[0] dist = calcdist(img, txt) match = calcmatch(label) match_n = match.cpu().numpy() positive_list = np.argwhere(match_n == 1).tolist() # the index list of all positive samples for positive in positive_list: negative = calcneg(dist, label, positive[0], positive[1]) # calculate negatives # negative = calcneg_dot(img, txt, match, anchor, positive) # calculate negative with dot 效果很差 triplet_list.append([positive[0], int(positive[1].cuda()), negative]) return dist, match, triplet_list def Positive(img, txt, label): ''' Input img = (batch,dim), txt = (batch,dim), label = (batch,) Output dist = (batch,batch),match = (batch,batch), positives = List with shape(pairs,2) Remark: return (anchor,positive) without finding triplets ''' batch = img.shape[0] dist = calcdist(img, txt) match = calcmatch(label) sample_list = torch.tensor([x for x in range(batch)]).int().cuda() positive_list = [[i, int(j.cpu())] for i in range(batch) for j in sample_list[label == label[i]]] return dist, match, positive_list def Modality_invariant_Loss(img, txt, label, margin=0.2): ''' Input img = (batch,dim), txt = (batch,dim), label = (batch,), margin = parameter Calculate invariant loss between images and texts belonging to the same class ''' batch = img.shape[0] dist = calcdist(img, txt) dist = torch.pow(dist, 2) match = calcmatch(label) # similar is 1, dissimilar is 0 pos = torch.mul(dist, match) loss = torch.sum(pos) return loss / batch def Contrastive_Loss(img, txt, label, margin=0.2): ''' Input img = (batch,dim), txt = (batch,dim), label = (batch,), margin = parameter Calculate triplet loss ''' batch = img.shape[0] dist = calcdist(img, txt) dist = torch.pow(dist, 2) match = calcmatch(label) # similar is 1, dissimilar is 0 pos = torch.mul(dist, match) neg = margin - torch.mul(dist, 1 - match) neg = torch.clamp(neg, 0) loss = torch.sum(pos) + torch.sum(neg) return loss / batch def Triplet_Loss(img, txt, label, margin=0.2, semi_hard=True): ''' Input img = (batch,dim), txt = (batch,dim), label = (batch,), margin = parameter Calculate triplet loss ''' loss = 0 dist = calcdist(img, txt) dist = torch.pow(dist, 2) match = calcmatch(label) match_n = match.cpu().numpy() positive = np.argwhere(match_n == 1).tolist() # the index list of all positive samples for x in positive: # # Semi-Hard Negative Mining if semi_hard: neg_index = torch.where( match[x[0]] == 0) # the index list of all negative samples (shared by image and text) neg_dis = dist[x[0]][neg_index] tmp = dist[x[0], x[1]] - neg_dis + margin tmp = torch.clamp(tmp, 0) loss = loss + torch.sum(tmp, dim=-1) else: # Hard Negative Mining negative = calcneg(dist, label, x[0], x[1]) # calculate hard negative tmp = dist[x[0], x[1]] - dist[x[0], negative] + margin if tmp > 0: loss = loss + tmp return loss / len(positive) def Lifted_Loss(img, txt, label, margin=1): # the margin is set to be 1 as the original paper ''' Input img = (batch,dim), txt = (batch,dim), label = (batch,), margin = parameter Calculate lifted structured embedding loss ''' # dist, match, positive = Positive(img, txt, label) dist = calcdist(img, txt) match = calcmatch(label) match_n = match.cpu().numpy() positive = np.argwhere(match_n == 1).tolist() # the index list of all positive samples loss = 0 for x in positive: neg_index = torch.where(match[x[0]] == 0) # the index list of all negative samples (shared by image and text) neg_dis_anchor = dist[x[0]][neg_index] neg_dis_postive = dist[x[1]][neg_index] tmp = dist[x[0], x[1]] + log_sum_exp(margin - neg_dis_postive) + log_sum_exp(margin - neg_dis_anchor) loss = loss + tmp return loss / (2 * len(positive)) def Npairs(img, txt, label, margin=0.2, alpha=0.1): ''' Input img = (batch,dim), txt = (batch,dim), label = (batch,), margin = parameter, alpha = parameter Calculate N-pairs loss ''' # dist, match, positive = Positive(img, txt, label) batch = img.shape[0] distdot_it = torch.exp(F.linear(img, txt)) # distdot_ti = torch.t(distdot) # !!!!!!!!!!!!! distdot_ti = torch.t(distdot_it) # !!!!!!!!!!!!! match = calcmatch(label) match_n = match.cpu().numpy() positive = np.argwhere(match_n == 1).tolist() # the index list of all positive samples loss = 0 for x in positive: neg_index = torch.where(match[x[0]] == 0) # the index list of all negative samples tmp_i2t = distdot_it[x[0], x[1]] - log_sum_exp(distdot_it[x[0]][neg_index]) tmp_t2i = distdot_ti[x[0], x[1]] - log_sum_exp(distdot_ti[x[0]][neg_index]) loss = loss + (tmp_i2t + tmp_t2i) / 2 loss = -loss / len(positive) for x in range(batch): loss = loss + alpha * (torch.norm(img[x]) + torch.norm(txt[x])) / batch return loss def Supervised_Contrastive_Loss(img, txt, label): ''' Input img = (batch,dim), txt = (batch,dim), label = (batch,), margin = parameter An unofficial implementation of supervised contrastive loss for multimodal learning ''' loss = 0 batch = img.shape[0] dist = calcdist(img, txt) dist = torch.pow(dist, 2) dist = dist / (torch.sum(dist) / (batch * batch)) # scale the metric match = calcmatch(label) # 相似为1,不相似为0 match_n = match.cpu().numpy() positive = np.argwhere(match_n == 1).tolist() # the index list of all positive samples loss = 0 for x in positive: neg_index = torch.where(match[x[0]] == 0) # the index list of all negative samples pos_sim = -dist[x[0], x[1]] neg_sims = -dist[x[0]][neg_index] tmp = pos_sim - log_sum_exp(neg_sims) loss = loss + tmp loss = -loss / len(positive) return loss def regularization(features, centers, labels): # features = l2norm(features, dim=-1) distance = (features - centers[labels]) distance = torch.sum(torch.pow(distance, 2), 1, keepdim=True) distance = (torch.sum(distance, 0, keepdim=True)) / features.shape[0] return distance def PAN(features, centers, labels, add_regularization=False): """The prototype contrastive loss and regularization loss in PAN(https://dl.acm.org/doi/abs/10.1145/3404835.3462867)""" batch = features.shape[0] features_square = torch.sum(torch.pow(features, 2), 1, keepdim=True) # 在第一个维度上平方 centers_square = torch.sum(torch.pow(torch.t(centers), 2), 0, keepdim=True) features_into_centers = 2 * torch.matmul(features, torch.t(centers)) dist = -(features_square + centers_square - features_into_centers) output = F.log_softmax(dist, dim=1) dce_loss = F.nll_loss(output, labels) if add_regularization: reg_loss = regularization(features, centers, labels) loss = dce_loss + reg_loss loss = dce_loss return loss / batch def Label_Regression_Loss(view1_predict, view2_predict, label_onehot): loss = ((view1_predict - label_onehot.float()) ** 2).sum(1).sqrt().mean() + ( (view2_predict - label_onehot.float()) ** 2).sum(1).sqrt().mean() return loss def Proxy_NCA(features, label, proxies, mrg=0.1, alpha=1): """ Input: :param feature: [2*batch, dim] concat image and text features :param label: [2*batch] :param proxies: [feature_dim, n_classes] :return: Proxy Anchor loss """ P = torch.t(proxies) # [feature_dim, n_classes]-->[n_classes, feature_dim] n_classes = P.shape[0] # similar to Proxc-NCA and Normlized Softmax cos = F.linear(features, P) # Calcluate cosine similarity [batch, n_classes] # Proxy-NCA loss (similar to Normlized Softmax and PAN,while the denominator does not contain positive prototype) loss = 0 for x in range(features.shape[0]): pos = torch.exp(cos[x, label[x]]) neg = torch.exp(cos[x]).sum(dim=-1) - pos loss = loss + torch.log(pos / neg) loss = -loss / features.shape[0] return loss def Proxy_Anchor(features, label, proxies, mrg=0.1, alpha=1): """ Input: :param feature: [2 * batch, dim] concat image and text features :param label: [2 * batch] :param proxies: [feature_dim, n_classes] :return: Proxy Anchor loss """ P = torch.t(proxies) # [feature_dim, n_classes]-->[n_classes, feature_dim] n_classes = P.shape[0] # similar to Proxc-NCA and Normlized Softmax cos = F.linear(features, P) # Calcluate cosine similarity [batch, n_classes] P_one_hot = label # [batch, n_classes] N_one_hot = 1 - P_one_hot pos_exp = torch.exp(-alpha * (cos - mrg)) # [batch, n_class] neg_exp = torch.exp(alpha * (cos + mrg)) # 出现了e+30导致nan with_pos_proxies = torch.nonzero(P_one_hot.sum(dim=0) != 0).squeeze( dim=1) # The set of positive proxies of data in the batch num_valid_proxies = len(with_pos_proxies) # The number of positive proxies P_sim_sum = torch.where(P_one_hot == 1, pos_exp, torch.zeros_like(pos_exp)).sum(dim=0) N_sim_sum = torch.where(N_one_hot == 1, neg_exp, torch.zeros_like(neg_exp)).sum(dim=0) pos_term = torch.log(1 + P_sim_sum).sum() / num_valid_proxies neg_term = torch.log(1 + N_sim_sum).sum() / n_classes loss = pos_term + neg_term return loss