Robust_vlm / adv_clip_loss.py
Yaning1001's picture
Add files using upload-large-folder tool
64470b3 verified
import torch
import torch.backends.cudnn as cudnn
from torch.cuda.amp import GradScaler, autocast
from modified_clip import clip
from models.model import *
from models.prompters import TokenPrompter, NullPrompter, PromptLearner
from attacks import *
import torch.nn.functional as F
import numpy as np
import torch.nn as nn
def FT_TeCoA_loss(images, target, text_tokens, optimizer, model, original_model,
prompter, add_prompter, prompt_learner, args):
"""
TeCoA (PGD-AT) for Fine-tuning
"""
criterion = torch.nn.CrossEntropyLoss().cuda()
if args.add_prompt_size == 0: # visual prompt token -- token level
prompt_token = None
else:
prompt_token = add_prompter()
alpha = args.train_stepsize
attack_iters = args.train_numsteps
### Adv generation ###
delta = attack_pgd(prompter, model, add_prompter, criterion, images,
target, text_tokens, alpha, attack_iters, 'l_inf', epsilon=args.train_eps)
adv_img = clip_img_preprocessing(images + delta)
prompted_adv_images = prompter(adv_img)
### Adv generation ###
# Classification loss
output_Iadv_Tnat, _ = multiGPU_CLIP(model, prompted_adv_images, text_tokens, prompt_token)
loss_cls = criterion(output_Iadv_Tnat, target)
loss = loss_cls
return loss, output_Iadv_Tnat
def FT_PMG_loss(images, target, text_tokens, optimizer, model, original_model,
prompter, add_prompter, prompt_learner, args):
"""
PMG (Aligning adv logits to the original CLIP model) for Fine-tuning
"""
criterion = torch.nn.CrossEntropyLoss().cuda()
if args.add_prompt_size == 0: # visual prompt token -- token level
prompt_token = None
else:
prompt_token = add_prompter()
alpha = args.train_stepsize
attack_iters = args.train_numsteps
### Adv generation ###
delta = attack_pgd(prompter, model, add_prompter, criterion, images,
target, text_tokens, alpha, attack_iters, 'l_inf', epsilon=args.train_eps)
adv_img = clip_img_preprocessing(images + delta)
prompted_adv_images = prompter(adv_img)
### Adv generation ###
### Clean sample and its prediction ###
nat_img = clip_img_preprocessing(images)
prompted_nat_images = prompter(nat_img)
with torch.no_grad():
Ori_output_Inat_Tnat, _ = multiGPU_CLIP(original_model, prompted_nat_images, text_tokens, prompt_token)
### Clean sample and its prediction ###
# Classification loss
output_Iadv_Tnat, _ = multiGPU_CLIP(model, prompted_adv_images, text_tokens, prompt_token)
loss_cls = criterion(output_Iadv_Tnat, target)
# Pred Alignment to the original model loss
criterion_KL = nn.KLDivLoss(reduction='batchmean').cuda()
loss_Pred_Align_Ori = criterion_KL(F.log_softmax(output_Iadv_Tnat, dim=1),
F.softmax(Ori_output_Inat_Tnat, dim=1))
# Pred Alignment (for the current model) loss
output_Inat_Tnat, _ = multiGPU_CLIP(model, prompted_nat_images, text_tokens, prompt_token)
loss_Pred_Align = criterion_KL(F.log_softmax(output_Iadv_Tnat, dim=1),
F.softmax(output_Inat_Tnat, dim=1))
loss = loss_cls + args.W_Pred_Align * loss_Pred_Align + args.W_Pred_Align_Ori * loss_Pred_Align_Ori
return loss, output_Iadv_Tnat
def FT_ImgText_PGD_loss(images, target, text_tokens, optimizer, model, original_model,
prompter, add_prompter, prompt_learner, args):
"""
PGD-AT + Ori_Pred_Align (Adversarial Imgs and Texts) for Fine-tuning
"""
criterion = torch.nn.CrossEntropyLoss().cuda()
if args.add_prompt_size == 0: # visual prompt token -- token level
prompt_token = None
else:
prompt_token = add_prompter()
alpha = args.train_stepsize
attack_iters = args.train_numsteps
### Clean sample and its prediction ###
nat_img = clip_img_preprocessing(images)
prompted_nat_images = prompter(nat_img)
with torch.no_grad():
Ori_output_Inat_Tnat, _ = multiGPU_CLIP(original_model, prompted_nat_images, text_tokens, prompt_token)
### Clean sample and its prediction ###
### Adv (Img & Text) generation ###
prompt_learner.load_state_dict(args.original_prompter_state)
delta = attack_pgd_adv_prompt(prompter, model, add_prompter, criterion, images,
target, text_tokens, alpha, attack_iters, 'l_inf',
prompt_learner, args.text_perb_stepsize, epsilon=args.train_eps)
adv_img = clip_img_preprocessing(images + delta)
prompted_adv_images = prompter(adv_img)
### Adv (Img & Text) generation ###
output_Iadv_Tnat, _ = multiGPU_CLIP_Text_Prompt_Tuning(model, prompted_adv_images, text_tokens, prompt_token, prompt_learner)
# Classification loss
loss_cls = criterion(output_Iadv_Tnat, target)
# Pred Alignment to the original model loss
criterion_KL = nn.KLDivLoss(reduction='batchmean').cuda()
loss_Pred_Align_Ori = criterion_KL(F.log_softmax(output_Iadv_Tnat, dim=1),
F.softmax(Ori_output_Inat_Tnat, dim=1))
loss = loss_cls + args.W_Pred_Align_Ori * loss_Pred_Align_Ori
return loss, output_Iadv_Tnat
def FT_TRADES_loss(images, target, text_tokens, optimizer, model, original_model,
prompter, add_prompter, prompt_learner, args):
"""
TRADES (Pred Align to Original CLIP) for Fine-tuning
"""
criterion = torch.nn.CrossEntropyLoss().cuda()
if args.add_prompt_size == 0: # visual prompt token -- token level
prompt_token = None
else:
prompt_token = add_prompter()
alpha = args.train_stepsize
attack_iters = args.train_numsteps
### Clean sample and its prediction ###
nat_img = clip_img_preprocessing(images)
prompted_nat_images = prompter(nat_img)
with torch.no_grad():
Ori_output_Inat_Tnat, _ = multiGPU_CLIP(original_model, prompted_nat_images, text_tokens, prompt_token)
### Clean sample and its prediction ###
### Adv generation ###
delta = attack_TRADES_KL(prompter, model, add_prompter, criterion, images,
target, text_tokens, alpha, attack_iters, 'l_inf',
Ori_output_Inat_Tnat, epsilon=args.train_eps)
adv_img = clip_img_preprocessing(images + delta)
prompted_adv_images = prompter(adv_img)
### Adv generation ###
# Multiplicative noise for image and text embeddings
if args.mul_noise_beta > 0.0:
output_Iadv_Tnat, _ = multiGPU_CLIP_multiply_noise(model, prompted_adv_images, text_tokens, prompt_token, beta=args.mul_noise_beta)
else:
output_Iadv_Tnat, _ = multiGPU_CLIP(model, prompted_adv_images, text_tokens, prompt_token)
# Nat classification loss
output_Inat_Tnat, _ = multiGPU_CLIP(model, prompted_nat_images, text_tokens, prompt_token)
loss_nat_cls = criterion(output_Inat_Tnat, target)
# Pred Alignment to the original model loss
criterion_KL = nn.KLDivLoss(reduction='batchmean').cuda()
loss_Pred_Align_Ori = criterion_KL(F.log_softmax(output_Iadv_Tnat, dim=1),
F.softmax(Ori_output_Inat_Tnat, dim=1))
loss = loss_nat_cls + args.W_Pred_Align_Ori * loss_Pred_Align_Ori
return loss, output_Iadv_Tnat
def criterion_L2(out, targets, reduction='mean'):
# squared l2 - it does not divide by the latent dimension
# should have shape (batch_size, embedding_size)
# Compute the element-wise squared error
squared_error_batch = F.mse_loss(out, targets, reduction='none')
squared_error_batch = torch.mean(squared_error_batch.sum(dim=1))
return squared_error_batch
def FT_FARE_loss(images, target, text_tokens, optimizer, model, original_model,
prompter, add_prompter, prompt_learner, args):
"""
FARE (Embedding Alignment (min-max) to Original CLIP) for Fine-tuning
"""
criterion = torch.nn.CrossEntropyLoss().cuda()
if args.add_prompt_size == 0: # visual prompt token -- token level
prompt_token = None
else:
prompt_token = add_prompter()
alpha = args.train_stepsize
attack_iters = args.train_numsteps
### Clean sample and its prediction ###
nat_img = clip_img_preprocessing(images)
prompted_nat_images = prompter(nat_img)
with torch.no_grad():
Ori_output_Inat_Tnat, _, Ori_emb_Inat_Tnat, _ = multiGPU_CLIP(original_model, prompted_nat_images, text_tokens, prompt_token, is_embedding=True)
### Clean sample and its prediction ###
### Adv generation ###
delta = attack_FARE_Emb_L2(prompter, model, add_prompter, criterion, images,
target, text_tokens, alpha, attack_iters, 'l_inf',
Ori_emb_Inat_Tnat, epsilon=args.train_eps)
adv_img = clip_img_preprocessing(images + delta)
prompted_adv_images = prompter(adv_img)
### Adv generation ###
# Multiplicative noise for image and text embeddings
if args.mul_noise_beta > 0.0:
output_Iadv_Tnat, _ = multiGPU_CLIP_multiply_noise(model, prompted_adv_images, text_tokens, prompt_token, beta=args.mul_noise_beta)
else:
output_Iadv_Tnat, _, emb_Iadv_Tnat, _ = multiGPU_CLIP(model, prompted_adv_images, text_tokens, prompt_token, is_embedding=True)
loss = criterion_L2(emb_Iadv_Tnat, Ori_emb_Inat_Tnat)
return loss, output_Iadv_Tnat
def FT_FARE_loss_weighted(images, target, text_tokens, optimizer, model, original_model,
prompter, add_prompter, prompt_learner, weights, args):
"""
FARE (Embedding Alignment with optional reweighting)
"""
criterion = torch.nn.CrossEntropyLoss().cuda()
if args.add_prompt_size == 0:
prompt_token = None
else:
prompt_token = add_prompter()
# 保持原始计算流程
nat_img = clip_img_preprocessing(images)
prompted_nat_images = prompter(nat_img)
with torch.no_grad():
Ori_output_Inat_Tnat, _, Ori_emb_Inat_Tnat, _ = multiGPU_CLIP(original_model,
prompted_nat_images,
text_tokens,
prompt_token,
is_embedding=True)
delta = attack_FARE_Emb_L2(prompter, model, add_prompter, criterion, images,
target, text_tokens, args.train_stepsize, args.train_numsteps,
'l_inf', Ori_emb_Inat_Tnat, epsilon=args.train_eps)
# 保持原有对抗样本生成流程
adv_img = clip_img_preprocessing(images + delta)
prompted_adv_images = prompter(adv_img)
# 保持原有特征提取流程
if args.mul_noise_beta > 0.0:
output_Iadv_Tnat, _ = multiGPU_CLIP_multiply_noise(model, prompted_adv_images,
text_tokens, prompt_token,
beta=args.mul_noise_beta)
_, _, emb_Iadv_Tnat, _ = multiGPU_CLIP(model, prompted_adv_images, text_tokens,
prompt_token, is_embedding=True)
else:
output_Iadv_Tnat, _, emb_Iadv_Tnat, _ = multiGPU_CLIP(model, prompted_adv_images,
text_tokens, prompt_token,
is_embedding=True)
sample_losses = torch.sum((emb_Iadv_Tnat - Ori_emb_Inat_Tnat)**2, dim=1)
# 修改点:添加权重支持
if weights is not None:
if isinstance(weights, np.ndarray):
weights = torch.tensor(weights, dtype=torch.float32) # numpy -> tensor
weights = weights.to(images.device).view(-1) # 确保维度一致
# **确保维度匹配**
assert weights.shape == sample_losses.shape, f"Shape mismatch: weights {weights.shape}, sample_losses {sample_losses.shape}"
# 计算带权重的损失
# loss = (sample_losses * weights).sum() # 也可以用 .mean() 归一化
loss = (sample_losses * weights).mean() # 也可以用 .mean() 归一化
# loss = (sample_losses * (weights / weights.mean())).mean()
else:
loss = criterion_L2(emb_Iadv_Tnat, Ori_emb_Inat_Tnat)
return loss, output_Iadv_Tnat