Spaces:

lucasddmc
/

ViTViz

Running

App Files Files Community

lucasddmc commited on Dec 12, 2025

Commit

8780912

1 Parent(s): e21e78f

feat: adds support to resnet gradients in SAGA attack

Browse files

Files changed (2) hide show

app.py +80 -15
utils/attacks.py +116 -8

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import numpy as np
 import torch
 from PIL import Image
 from typing import Optional, List, Tuple
 from utils.model_loader import load_model_and_labels
 from utils.preprocessing import get_default_transform, preprocess_image
@@ -24,6 +25,7 @@ from utils.visualization import (
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 transform = get_default_transform()
 def _to_path(file_like: Optional[object]) -> Optional[str]:
     """Extrai caminho de um objeto vindo do Gradio File (string, dict com 'name' ou objeto com atributo .name)."""
@@ -204,6 +206,8 @@ def run_attack(
     eps: float,
     alpha: float,
     steps: int,
 ) -> Tuple[List[Image.Image], str, List[List[torch.Tensor]]]:
     """
     Executa ataque adversarial (FGSM ou PGD) untargeted e extrai atenção.
@@ -213,8 +217,9 @@ def run_attack(
         image: imagem PIL
         attack_type: "FGSM" (single-step) ou "PGD" (iterativo)
         eps: epsilon (perturbação máxima)
-        alpha: step size (apenas PGD)
-        steps: número de iterações (apenas PGD)
         discard_ratio: proporção de atenções fracas a descartar
         head_fusion: como agregar heads ('mean', 'max', 'min')
         alpha_overlay: transparência da sobreposição
@@ -245,7 +250,16 @@ def run_attack(
         if attack_type == "FGSM":
             attack = FGSM(model, eps=eps)
         elif attack_type == "MIM":
-            attack = MIFGSM(model, eps=eps, alpha=alpha, steps=steps, decay=1.0)
         elif attack_type == "SAGA":
             attack = SAGA(model, eps=eps, steps=steps)
         else:  # PGD
@@ -278,9 +292,9 @@ def run_attack(
         elif attack_type == "MIM":
             result += f"- Alpha (α): {alpha:.4f}\n"
             result += f"- Steps: {steps}\n"
-            result += f"- Momentum decay: 1.0\n"
             result += f"- Normalized gradient with momentum accumulation\n"
-        elif attack_type == "SAGA":
             result += f"- Steps: {steps}\n"
             result += f"- Attention-weighted gradient (ViT-specific)\n"
         else:  # FGSM
@@ -433,10 +447,10 @@ def create_app():
                             gr.Markdown("#### Attack Configuration")
                             attack_type = gr.Dropdown(
-                                choices=["PGD", "FGSM", "MIM", "SAGA"],
                                 value="PGD",
                                 label="Attack Type",
-                                info="PGD/MIM: iterative | FGSM: single-step | SAGA: gradient × attention"
                             )
                             eps_input = gr.Slider(
@@ -467,6 +481,26 @@ def create_app():
                                     step=1,
                                     label="Steps - NNumber of Iterations"
                                 )
                         with gr.Column():
                             output_text_attack = gr.Markdown(label="Result")
@@ -475,18 +509,49 @@ def create_app():
                         def update_attack_params(attack_type):
                             if attack_type == "FGSM":
                                 # FGSM: não usa alpha nem steps
-                                return gr.update(visible=False), gr.update(visible=False)
                             elif attack_type == "SAGA":
-                                # SAGA: usa steps mas não usa alpha
-                                return gr.update(visible=False), gr.update(visible=True)
-                            else:  # PGD ou MIM
-                                # PGD e MIM: usam alpha e steps
-                                return gr.update(visible=True), gr.update(visible=True)
                         attack_type.change(
                             fn=update_attack_params,
                             inputs=[attack_type],
-                            outputs=[alpha_group, steps_group]
                         )
                         # Removido: configuração de rollout da área de ataque
@@ -647,7 +712,7 @@ def create_app():
                     fn=run_attack,
                     inputs=[
                         model_upload_attack, image_upload_attack,
-                        attack_type, eps_input, alpha_input, steps_input
                     ],
                     outputs=[iteration_images_state, output_text_attack, cached_attentions_state]
                 ).then(

 import torch
 from PIL import Image
 from typing import Optional, List, Tuple
+from pathlib import Path
 from utils.model_loader import load_model_and_labels
 from utils.preprocessing import get_default_transform, preprocess_image
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 transform = get_default_transform()
+RESNET_BACKBONE_PATH = Path("models/resnet.pth")
 def _to_path(file_like: Optional[object]) -> Optional[str]:
     """Extrai caminho de um objeto vindo do Gradio File (string, dict com 'name' ou objeto com atributo .name)."""
     eps: float,
     alpha: float,
     steps: int,
+    decay: float,
+    vit_weight: float,
 ) -> Tuple[List[Image.Image], str, List[List[torch.Tensor]]]:
     """
     Executa ataque adversarial (FGSM ou PGD) untargeted e extrai atenção.
         image: imagem PIL
         attack_type: "FGSM" (single-step) ou "PGD" (iterativo)
         eps: epsilon (perturbação máxima)
+        alpha: step size (apenas PGD/MIM)
+        steps: número de iterações (iterativos)
+        decay: momentum decay (apenas MIM)
         discard_ratio: proporção de atenções fracas a descartar
         head_fusion: como agregar heads ('mean', 'max', 'min')
         alpha_overlay: transparência da sobreposição
         if attack_type == "FGSM":
             attack = FGSM(model, eps=eps)
         elif attack_type == "MIM":
+            attack = MIFGSM(model, eps=eps, alpha=alpha, steps=steps, decay=decay)
+        elif attack_type == "SAGA (with CNN gradient)":
+            attack = SAGA(
+                model,
+                eps=eps,
+                steps=steps,
+                use_resnet=True,
+                vit_weight=vit_weight,
+                cnn_checkpoint_path=str(RESNET_BACKBONE_PATH)
+            )
         elif attack_type == "SAGA":
             attack = SAGA(model, eps=eps, steps=steps)
         else:  # PGD
         elif attack_type == "MIM":
             result += f"- Alpha (α): {alpha:.4f}\n"
             result += f"- Steps: {steps}\n"
+            result += f"- Momentum decay: {decay:.2f}\n"
             result += f"- Normalized gradient with momentum accumulation\n"
+        elif attack_type in ("SAGA", "SAGA (with CNN gradient)"):
             result += f"- Steps: {steps}\n"
             result += f"- Attention-weighted gradient (ViT-specific)\n"
         else:  # FGSM
                             gr.Markdown("#### Attack Configuration")
                             attack_type = gr.Dropdown(
+                                choices=["PGD", "FGSM", "MIM", "SAGA", "SAGA (with CNN gradient)"],
                                 value="PGD",
                                 label="Attack Type",
+                                info="PGD/MIM: iterative | FGSM: single-step | SAGA variants: gradient × attention"
                             )
                             eps_input = gr.Slider(
                                     step=1,
                                     label="Steps - NNumber of Iterations"
                                 )
+                            decay_group = gr.Group(visible=False)
+                            with decay_group:
+                                decay_input = gr.Slider(
+                                    minimum=0.0,
+                                    maximum=1.0,
+                                    value=1.0,
+                                    step=0.05,
+                                    label="Momentum Decay (MIM only)"
+                                )
+                            vit_weight_slider = gr.Slider(
+                                minimum=0.0,
+                                maximum=1.0,
+                                value=0.5,
+                                step=0.05,
+                                label="ViT Gradient Weight",
+                                info="Blend between ViT attention gradient (1.0) and CNN gradient (0.0)",
+                                visible=False
+                            )
                         with gr.Column():
                             output_text_attack = gr.Markdown(label="Result")
                         def update_attack_params(attack_type):
                             if attack_type == "FGSM":
                                 # FGSM: não usa alpha nem steps
+                                return (
+                                    gr.update(visible=False),
+                                    gr.update(visible=False),
+                                    gr.update(visible=False),
+                                    gr.update(visible=False)
+                                )
+                            elif attack_type == "SAGA (with CNN gradient)":
+                                # SAGA+CNN: usa steps e slider de blend, sem alpha
+                                return (
+                                    gr.update(visible=False),
+                                    gr.update(visible=True),
+                                    gr.update(visible=False),
+                                    gr.update(visible=True)
+                                )
                             elif attack_type == "SAGA":
+                                # SAGA: usa steps mas não usa alpha ou slider
+                                return (
+                                    gr.update(visible=False),
+                                    gr.update(visible=True),
+                                    gr.update(visible=False),
+                                    gr.update(visible=False)
+                                )
+                            elif attack_type == "MIM":
+                                # MIM: usa alpha, steps e decay
+                                return (
+                                    gr.update(visible=True),
+                                    gr.update(visible=True),
+                                    gr.update(visible=True),
+                                    gr.update(visible=False)
+                                )
+                            else:  # PGD
+                                # PGD: usa alpha e steps, sem decay
+                                return (
+                                    gr.update(visible=True),
+                                    gr.update(visible=True),
+                                    gr.update(visible=False),
+                                    gr.update(visible=False)
+                                )
                         attack_type.change(
                             fn=update_attack_params,
                             inputs=[attack_type],
+                            outputs=[alpha_group, steps_group, decay_group, vit_weight_slider]
                         )
                         # Removido: configuração de rollout da área de ataque
                     fn=run_attack,
                     inputs=[
                         model_upload_attack, image_upload_attack,
+                        attack_type, eps_input, alpha_input, steps_input, decay_input, vit_weight_slider
                     ],
                     outputs=[iteration_images_state, output_text_attack, cached_attentions_state]
                 ).then(

utils/attacks.py CHANGED Viewed

@@ -1,8 +1,20 @@
 import torch
 import torchattacks
 from PIL import Image
-from typing import List, Tuple
 import numpy as np
 def capture_outputs_and_attentions(model, x_norm: torch.Tensor):
@@ -212,7 +224,7 @@ class PGDIterations(torchattacks.PGD):
         outputs0, attentions0 = capture_outputs_and_attentions(self.model, images)
         self.attentions_per_iter.append([att for att in attentions0])
-        for _ in range(self.steps):
             # Normalizar para passar pelo modelo
             adv_images = (adv_images_denorm - mean) / std
             adv_images.requires_grad = True
@@ -259,7 +271,9 @@ class SAGA(torch.nn.Module):
     Paper: "On the Robustness of Vision Transformers to Adversarial Examples" (ICCV 2021)
     """
-    def __init__(self, model, eps=8/255, steps=10, discard_ratio: float = 0.0, head_fusion: str = "mean"):
         """Implementação correta do SAGA baseada no código original (SelfAttentionGradientAttack).
         Parâmetros:
@@ -268,6 +282,8 @@ class SAGA(torch.nn.Module):
         - steps: número de iterações (FGSM iterativo)
         - discard_ratio: razão de descarte usada no attention rollout
         - head_fusion: estratégia de fusão de heads ('mean','max','min')
         """
         super().__init__()
         self.model = model
@@ -276,6 +292,10 @@ class SAGA(torch.nn.Module):
         self.eps_step = self.eps / max(1, steps)
         self.discard_ratio = discard_ratio
         self.head_fusion = head_fusion
         self.device = next(model.parameters()).device
         self.iteration_images: List[Image.Image] = []
         self.iteration_tensors: List[torch.Tensor] = []
@@ -283,6 +303,7 @@ class SAGA(torch.nn.Module):
         # Cache opcional: atenções por camada/head em cada iteração
         # Formato: lista por iteração; cada item é a lista de tensores [B, H, T, T] por camada
         self.attentions_per_iter: List[List[torch.Tensor]] = []
     def _attention_map(self, images_norm: torch.Tensor, save: bool = False) -> torch.Tensor:
         """Extrai mapa de atenção (rollout) e retorna tensor expandido [B,3,H,W] em [0,1].
@@ -331,6 +352,82 @@ class SAGA(torch.nn.Module):
         attentions = [a.cpu() for a in attentions]
         return outputs, attentions
     def forward(self, images, labels) -> Tuple[torch.Tensor, List[Image.Image]]:
         """Executa o ataque SAGA (FGSM iterativo com ponderação por atenção).
@@ -374,16 +471,14 @@ class SAGA(torch.nn.Module):
         mask0_resized = cv2.resize(mask0, (w, h))
         self.attention_masks_cache.append(mask0.copy())
-        loss_fn = torch.nn.CrossEntropyLoss()
-        for _ in range(self.steps):
             # Normalizar para forward
             adv_norm = (adv_denorm - mean) / std
             adv_norm.requires_grad = True
             outputs, attentions = self._capture_outputs_and_attentions(adv_norm)
             if isinstance(outputs, tuple):  # compatibilidade com modelos que retornam extras
                 outputs = outputs[0]
-            loss = loss_fn(outputs, labels)
             grad = torch.autograd.grad(loss, adv_norm, retain_graph=False, create_graph=False)[0]
             # Atenção da imagem adversarial atual (já capturada)
@@ -399,8 +494,21 @@ class SAGA(torch.nn.Module):
             self.attention_masks_cache.append(mask.copy())
             grad_weighted = grad * att_map
             # FGSM step em pixel space (sign do gradiente normalizado equivale ao do desnormalizado)
-            adv_denorm = adv_denorm.detach() + self.eps_step * grad_weighted.sign()
             # Projeção na bola L_inf de raio eps em relação à imagem original
             delta = torch.clamp(adv_denorm - images_denorm, min=-self.eps, max=self.eps)

 import torch
 import torchattacks
 from PIL import Image
+from typing import List, Tuple, Optional
 import numpy as np
+import warnings
+from pathlib import Path
+try:
+    import torchvision.models as tv_models
+except Exception:  # pragma: no cover - torchvision is optional for ViT-only mode
+    tv_models = None
+try:
+    import timm
+except Exception:  # pragma: no cover - timm is optional for CNN blending
+    timm = None
 def capture_outputs_and_attentions(model, x_norm: torch.Tensor):
         outputs0, attentions0 = capture_outputs_and_attentions(self.model, images)
         self.attentions_per_iter.append([att for att in attentions0])
+        for step_idx in range(self.steps):
             # Normalizar para passar pelo modelo
             adv_images = (adv_images_denorm - mean) / std
             adv_images.requires_grad = True
     Paper: "On the Robustness of Vision Transformers to Adversarial Examples" (ICCV 2021)
     """
+    def __init__(self, model, eps=8/255, steps=10, discard_ratio: float = 0.0,
+                 head_fusion: str = "mean", use_resnet: bool = False,
+                 cnn_checkpoint_path: str = "resnet.pth", vit_weight=0.5):
         """Implementação correta do SAGA baseada no código original (SelfAttentionGradientAttack).
         Parâmetros:
         - steps: número de iterações (FGSM iterativo)
         - discard_ratio: razão de descarte usada no attention rollout
         - head_fusion: estratégia de fusão de heads ('mean','max','min')
+        - use_resnet: se True, acumula gradiente de um backbone CNN externo e o mistura ao gradiente ponderado pela atenção
+        - cnn_checkpoint_path: caminho padrão do backbone CNN auxiliar (será carregado sob demanda)
         """
         super().__init__()
         self.model = model
         self.eps_step = self.eps / max(1, steps)
         self.discard_ratio = discard_ratio
         self.head_fusion = head_fusion
+        self.use_resnet = use_resnet
+        self.cnn_checkpoint_path = Path(cnn_checkpoint_path)
+        self.cnn_model: Optional[torch.nn.Module] = None
+        self.vit_weight = vit_weight
         self.device = next(model.parameters()).device
         self.iteration_images: List[Image.Image] = []
         self.iteration_tensors: List[torch.Tensor] = []
         # Cache opcional: atenções por camada/head em cada iteração
         # Formato: lista por iteração; cada item é a lista de tensores [B, H, T, T] por camada
         self.attentions_per_iter: List[List[torch.Tensor]] = []
+        self.loss_fn = torch.nn.CrossEntropyLoss()
     def _attention_map(self, images_norm: torch.Tensor, save: bool = False) -> torch.Tensor:
         """Extrai mapa de atenção (rollout) e retorna tensor expandido [B,3,H,W] em [0,1].
         attentions = [a.cpu() for a in attentions]
         return outputs, attentions
+    def _load_cnn_backbone(self) -> Optional[torch.nn.Module]:
+        """Carrega (lazy) o backbone CNN auxiliar usado quando use_resnet=True."""
+        if not self.use_resnet:
+            return None
+        if self.cnn_model is not None:
+            return self.cnn_model
+        if tv_models is None:
+            warnings.warn("torchvision não disponível; desabilitando modo CNN do SAGA.")
+            return None
+        model: Optional[torch.nn.Module] = None
+        checkpoint_model_name = "resnetv2_101x1_bit.goog_in21k_ft_in1k"
+        if self.cnn_checkpoint_path and self.cnn_checkpoint_path.exists():
+            try:
+                checkpoint = torch.load(self.cnn_checkpoint_path, map_location=self.device)
+                if isinstance(checkpoint, torch.nn.Module):
+                    model = checkpoint
+                elif isinstance(checkpoint, dict):
+                    state_dict = checkpoint.get('model_state_dict') or checkpoint.get('state_dict') or checkpoint
+                    if timm is not None and any(key.startswith("stem.") for key in state_dict.keys()):
+                        num_classes = None
+                        head_bias = state_dict.get('head.fc.bias')
+                        if isinstance(head_bias, torch.Tensor):
+                            num_classes = head_bias.shape[0]
+                        model = timm.create_model(
+                            checkpoint.get("model_name", checkpoint_model_name),
+                            pretrained=False,
+                            num_classes=num_classes or 1000
+                        )
+                        load_result = model.load_state_dict(state_dict, strict=False)
+                    else:
+                        model = tv_models.resnet101(weights=None)
+                        load_result = model.load_state_dict(state_dict, strict=False)
+                    missing = load_result.missing_keys
+                    unexpected = load_result.unexpected_keys
+                    if missing or unexpected:
+                        warn_msg = "[SAGA] ResNet checkpoint keys mismatch."
+                        if missing:
+                            warn_msg += f" Missing: {missing[:5]}{'...' if len(missing) > 5 else ''}."
+                        if unexpected:
+                            warn_msg += f" Unexpected: {unexpected[:5]}{'...' if len(unexpected) > 5 else ''}."
+                        warnings.warn(warn_msg + " Using available weights (strict=False).")
+                else:
+                    warnings.warn(f"Formato de checkpoint desconhecido em {self.cnn_checkpoint_path}; utilizando pesos padrão.")
+            except Exception as exc:  # pragma: no cover - fallback resiliente
+                warnings.warn(f"Falha ao carregar {self.cnn_checkpoint_path}: {exc}. Usando ResNet padrão.")
+        if model is None:
+            if timm is not None:
+                try:
+                    model = timm.create_model(checkpoint_model_name, pretrained=True)
+                except Exception:
+                    model = None
+            if model is None and tv_models is not None:
+                try:
+                    model = tv_models.resnet101(weights="IMAGENET1K_V2")
+                except Exception:
+                    model = tv_models.resnet101(pretrained=True)
+        model = model.to(self.device)
+        model.eval()
+        self.cnn_model = model
+        return self.cnn_model
+    def _compute_cnn_gradient(self, images_norm: torch.Tensor, labels: torch.Tensor) -> Optional[torch.Tensor]:
+        """Obtém gradientes do backbone CNN auxiliar para a mesma imagem normalizada."""
+        cnn_model = self._load_cnn_backbone()
+        if cnn_model is None:
+            return None
+        cnn_input = images_norm.detach().clone().requires_grad_(True)
+        outputs = cnn_model(cnn_input)
+        loss = self.loss_fn(outputs, labels)
+        grad = torch.autograd.grad(loss, cnn_input, retain_graph=False, create_graph=False)[0]
+        return grad
     def forward(self, images, labels) -> Tuple[torch.Tensor, List[Image.Image]]:
         """Executa o ataque SAGA (FGSM iterativo com ponderação por atenção).
         mask0_resized = cv2.resize(mask0, (w, h))
         self.attention_masks_cache.append(mask0.copy())
+        for step_idx in range(self.steps):
             # Normalizar para forward
             adv_norm = (adv_denorm - mean) / std
             adv_norm.requires_grad = True
             outputs, attentions = self._capture_outputs_and_attentions(adv_norm)
             if isinstance(outputs, tuple):  # compatibilidade com modelos que retornam extras
                 outputs = outputs[0]
+            loss = self.loss_fn(outputs, labels)
             grad = torch.autograd.grad(loss, adv_norm, retain_graph=False, create_graph=False)[0]
             # Atenção da imagem adversarial atual (já capturada)
             self.attention_masks_cache.append(mask.copy())
             grad_weighted = grad * att_map
+            grad_final = grad_weighted
+            if self.use_resnet:
+                cnn_grad = self._compute_cnn_gradient(adv_norm, labels)
+                if cnn_grad is not None:
+                    vit_contrib = grad_weighted.detach().abs().mean().item()
+                    cnn_contrib = cnn_grad.detach().abs().mean().item()
+                    grad_final = self.vit_weight * grad_weighted + (1 - self.vit_weight) * cnn_grad
+                    blended_contrib = grad_final.detach().abs().mean().item()
+                    print(
+                        f"[SAGA][step {step_idx+1}/{self.steps}] vit_weight={self.vit_weight:.2f} "
+                        f"|ViT|={vit_contrib:.4e} |CNN|={cnn_contrib:.4e} |blend|={blended_contrib:.4e}"
+                    )
             # FGSM step em pixel space (sign do gradiente normalizado equivale ao do desnormalizado)
+            adv_denorm = adv_denorm.detach() + self.eps_step * grad_final.sign()
             # Projeção na bola L_inf de raio eps em relação à imagem original
             delta = torch.clamp(adv_denorm - images_denorm, min=-self.eps, max=self.eps)