jeffliulab commited on
Commit
216e171
·
verified ·
1 Parent(s): d79ceaf

Fix README metadata + initial deploy

Browse files
Files changed (6) hide show
  1. README.md +18 -6
  2. app.py +209 -0
  3. examples/car.jpg +0 -0
  4. examples/cat.jpg +0 -0
  5. examples/dog.jpg +0 -0
  6. requirements.txt +5 -0
README.md CHANGED
@@ -1,12 +1,24 @@
1
  ---
2
- title: Adversarial Attack
3
- emoji: 📚
4
- colorFrom: purple
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.11.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Adversarial Attack Demo
3
+ emoji: "\U0001F6E1\uFE0F"
4
+ colorFrom: red
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: "5.29.0"
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # Adversarial Attack Demo | FGSM & PGD
14
+
15
+ Upload an image and watch how small, imperceptible perturbations can fool a neural network classifier.
16
+
17
+ **Courses**: 215 AI Safety ch1-ch2
18
+
19
+ ## Features
20
+ - FGSM (Fast Gradient Sign Method) attack
21
+ - PGD (Projected Gradient Descent) iterative attack
22
+ - Side-by-side comparison: original vs perturbation vs adversarial
23
+ - Adjustable epsilon, step size, and iteration count
24
+ - L-inf / L2 / SSIM metrics
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Adversarial Attack Demo — FGSM & PGD
3
+ Courses: 215 AI Safety ch1-ch2
4
+ """
5
+
6
+ import json
7
+ import numpy as np
8
+ import torch
9
+ import torch.nn.functional as F
10
+ import torchvision.models as models
11
+ import torchvision.transforms as T
12
+ import gradio as gr
13
+ from PIL import Image
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Model & preprocessing
17
+ # ---------------------------------------------------------------------------
18
+ device = torch.device("cpu")
19
+ model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1).eval().to(device)
20
+
21
+ IMAGENET_MEAN = [0.485, 0.456, 0.406]
22
+ IMAGENET_STD = [0.229, 0.224, 0.225]
23
+
24
+ preprocess = T.Compose([
25
+ T.Resize(256),
26
+ T.CenterCrop(224),
27
+ T.ToTensor(),
28
+ ])
29
+
30
+ normalize = T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
31
+ inv_normalize = T.Normalize(
32
+ mean=[-m / s for m, s in zip(IMAGENET_MEAN, IMAGENET_STD)],
33
+ std=[1 / s for s in IMAGENET_STD],
34
+ )
35
+
36
+ # Load ImageNet class labels
37
+ LABELS_URL = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
38
+ try:
39
+ import urllib.request
40
+ with urllib.request.urlopen(LABELS_URL) as resp:
41
+ LABELS = json.loads(resp.read().decode())
42
+ except Exception:
43
+ LABELS = [str(i) for i in range(1000)]
44
+
45
+
46
+ def get_top3(logits: torch.Tensor):
47
+ probs = F.softmax(logits, dim=1)[0]
48
+ top3 = torch.topk(probs, 3)
49
+ return [(LABELS[idx], float(prob)) for prob, idx in zip(top3.values, top3.indices)]
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Attack implementations
54
+ # ---------------------------------------------------------------------------
55
+ def fgsm_attack(img_tensor: torch.Tensor, epsilon: float) -> torch.Tensor:
56
+ """Single-step FGSM (untargeted)."""
57
+ inp = normalize(img_tensor.clone()).unsqueeze(0).to(device)
58
+ inp.requires_grad = True
59
+ output = model(inp)
60
+ loss = F.cross_entropy(output, output.argmax(1))
61
+ loss.backward()
62
+ # Perturb in *pixel* space (pre-normalize)
63
+ grad_sign = inp.grad.sign()
64
+ # Convert gradient back to pixel space
65
+ perturbed_norm = inp + epsilon * grad_sign
66
+ # Denormalize, clamp, re-normalize to get pixel-space perturbed image
67
+ perturbed_pixel = inv_normalize(perturbed_norm.squeeze(0))
68
+ perturbed_pixel = torch.clamp(perturbed_pixel, 0, 1)
69
+ return perturbed_pixel
70
+
71
+
72
+ def pgd_attack(
73
+ img_tensor: torch.Tensor,
74
+ epsilon: float,
75
+ alpha: float,
76
+ num_steps: int,
77
+ ) -> torch.Tensor:
78
+ """Multi-step PGD (untargeted)."""
79
+ orig = img_tensor.clone()
80
+ perturbed = img_tensor.clone()
81
+
82
+ for _ in range(num_steps):
83
+ inp = normalize(perturbed.clone()).unsqueeze(0).to(device)
84
+ inp.requires_grad = True
85
+ output = model(inp)
86
+ loss = F.cross_entropy(output, output.argmax(1))
87
+ loss.backward()
88
+ grad_sign = inp.grad.sign()
89
+ # Step in normalized space then convert back
90
+ adv_norm = inp + alpha * grad_sign
91
+ adv_pixel = inv_normalize(adv_norm.squeeze(0))
92
+ # Project onto epsilon-ball around original (pixel space)
93
+ perturbation = torch.clamp(adv_pixel - orig, -epsilon, epsilon)
94
+ perturbed = torch.clamp(orig + perturbation, 0, 1).detach()
95
+
96
+ return perturbed
97
+
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # Main function
101
+ # ---------------------------------------------------------------------------
102
+ def attack(
103
+ image: Image.Image,
104
+ method: str,
105
+ epsilon: float,
106
+ pgd_steps: int,
107
+ pgd_alpha: float,
108
+ ):
109
+ if image is None:
110
+ return None, None, None, ""
111
+
112
+ img_tensor = preprocess(image.convert("RGB"))
113
+
114
+ # Original prediction
115
+ with torch.no_grad():
116
+ orig_logits = model(normalize(img_tensor).unsqueeze(0))
117
+ orig_pred = get_top3(orig_logits)
118
+ orig_label = orig_pred[0][0]
119
+
120
+ # Attack
121
+ if method == "FGSM":
122
+ adv_tensor = fgsm_attack(img_tensor, epsilon)
123
+ else:
124
+ adv_tensor = pgd_attack(img_tensor, epsilon, pgd_alpha, pgd_steps)
125
+
126
+ # Adversarial prediction
127
+ with torch.no_grad():
128
+ adv_logits = model(normalize(adv_tensor).unsqueeze(0))
129
+ adv_pred = get_top3(adv_logits)
130
+ adv_label = adv_pred[0][0]
131
+
132
+ # Perturbation visualization (amplified 10x)
133
+ diff = (adv_tensor - img_tensor)
134
+ perturbation = torch.clamp(diff * 10 + 0.5, 0, 1)
135
+
136
+ # Convert to numpy images
137
+ orig_img = (img_tensor.permute(1, 2, 0).numpy() * 255).astype(np.uint8)
138
+ pert_img = (perturbation.permute(1, 2, 0).numpy() * 255).astype(np.uint8)
139
+ adv_img = (adv_tensor.permute(1, 2, 0).numpy() * 255).astype(np.uint8)
140
+
141
+ # Metrics
142
+ linf = float(diff.abs().max())
143
+ l2 = float(diff.norm(2))
144
+ success = "ATTACK SUCCESS" if orig_label != adv_label else "Attack failed (same class)"
145
+
146
+ metrics_text = (
147
+ f"**{success}**\n\n"
148
+ f"| Metric | Value |\n|---|---|\n"
149
+ f"| Original Top-1 | {orig_pred[0][0]} ({orig_pred[0][1]:.1%}) |\n"
150
+ f"| Adversarial Top-1 | {adv_pred[0][0]} ({adv_pred[0][1]:.1%}) |\n"
151
+ f"| L-inf | {linf:.4f} |\n"
152
+ f"| L2 | {l2:.4f} |\n"
153
+ f"| Epsilon | {epsilon} |\n"
154
+ f"| Method | {method} |"
155
+ )
156
+
157
+ return orig_img, pert_img, adv_img, metrics_text
158
+
159
+
160
+ # ---------------------------------------------------------------------------
161
+ # Gradio UI
162
+ # ---------------------------------------------------------------------------
163
+ with gr.Blocks(title="Adversarial Attack Demo") as demo:
164
+ gr.Markdown(
165
+ "# Adversarial Attack Demo | FGSM & PGD\n"
166
+ "Upload an image and see how imperceptible perturbations fool a ResNet-18 classifier.\n"
167
+ "*Course: 215 AI Safety*"
168
+ )
169
+
170
+ with gr.Row():
171
+ with gr.Column(scale=1):
172
+ input_image = gr.Image(type="pil", label="Upload Image")
173
+ method = gr.Radio(["FGSM", "PGD"], value="FGSM", label="Attack Method")
174
+ epsilon = gr.Slider(0.0, 0.3, value=0.03, step=0.005, label="Epsilon (perturbation budget)")
175
+ pgd_steps = gr.Slider(1, 40, value=10, step=1, label="PGD Steps", visible=True)
176
+ pgd_alpha = gr.Slider(0.001, 0.05, value=0.007, step=0.001, label="PGD Step Size", visible=True)
177
+ run_btn = gr.Button("Run Attack", variant="primary")
178
+
179
+ with gr.Column(scale=2):
180
+ with gr.Row():
181
+ orig_out = gr.Image(label="Original Image")
182
+ pert_out = gr.Image(label="Perturbation (10x amplified)")
183
+ adv_out = gr.Image(label="Adversarial Image")
184
+ metrics = gr.Markdown(label="Results")
185
+
186
+ def toggle_pgd(m):
187
+ visible = m == "PGD"
188
+ return gr.update(visible=visible), gr.update(visible=visible)
189
+
190
+ method.change(toggle_pgd, inputs=[method], outputs=[pgd_steps, pgd_alpha])
191
+
192
+ run_btn.click(
193
+ fn=attack,
194
+ inputs=[input_image, method, epsilon, pgd_steps, pgd_alpha],
195
+ outputs=[orig_out, pert_out, adv_out, metrics],
196
+ )
197
+
198
+ gr.Examples(
199
+ examples=[
200
+ ["examples/cat.jpg", "FGSM", 0.03, 10, 0.007],
201
+ ["examples/dog.jpg", "PGD", 0.02, 20, 0.005],
202
+ ["examples/car.jpg", "FGSM", 0.05, 10, 0.007],
203
+ ],
204
+ inputs=[input_image, method, epsilon, pgd_steps, pgd_alpha],
205
+ label="Try these examples",
206
+ )
207
+
208
+ if __name__ == "__main__":
209
+ demo.launch()
examples/car.jpg ADDED
examples/cat.jpg ADDED
examples/dog.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=5.0.0
2
+ torch>=2.0.0
3
+ torchvision>=0.15.0
4
+ numpy
5
+ Pillow