ttoosi commited on
Commit
8a7ee57
·
1 Parent(s): 75fe650

Keep only three examples; ArtGallery1 first

Browse files
Files changed (1) hide show
  1. app.py +22 -276
app.py CHANGED
@@ -41,44 +41,9 @@ model = GenerativeInferenceModel()
41
 
42
  # Define example images and their parameters with updated values from the research
43
  examples = [
44
- {
45
- "image": os.path.join("stimuli", "farm1.jpg"),
46
- "name": "farm1",
47
- "wiki": "https://en.wikipedia.org/wiki/Visual_perception",
48
- "papers": [
49
- "[Adversarially Robust Vision](https://github.com/MadryLab/robustness)",
50
- "[Generative Inference](https://doi.org/10.1016/j.tics.2003.08.003)"
51
- ],
52
- "method": "Prior-Guided Drift Diffusion",
53
- "reverse_diff": {
54
- "model": "resnet50_robust",
55
- "layer": "all",
56
- "initial_noise": 0.0,
57
- "diffusion_noise": 0.02,
58
- "step_size": 1.0,
59
- "iterations": 501,
60
- "epsilon": 40.0
61
- },
62
- "inference_normalization": "off",
63
- "use_adaptive_eps": False,
64
- "use_adaptive_step": False,
65
- "mask_center_x": 0.0,
66
- "mask_center_y": 0.0,
67
- "mask_radius": 0.2,
68
- "mask_sigma": 0.3,
69
- "eps_max_mult": 300.0,
70
- "eps_min_mult": 1.0,
71
- "step_max_mult": 10.0,
72
- "step_min_mult": 1.0,
73
- },
74
  {
75
  "image": os.path.join("stimuli", "ArtGallery1.jpg"),
76
  "name": "ArtGallery1",
77
- "wiki": "https://en.wikipedia.org/wiki/Visual_perception",
78
- "papers": [
79
- "[Adversarially Robust Vision](https://github.com/MadryLab/robustness)",
80
- "[Generative Inference](https://doi.org/10.1016/j.tics.2003.08.003)"
81
- ],
82
  "method": "Prior-Guided Drift Diffusion",
83
  "reverse_diff": {
84
  "model": "resnet50_robust",
@@ -102,268 +67,55 @@ examples = [
102
  "step_min_mult": 1.0,
103
  },
104
  {
105
- "image": os.path.join("stimuli", "urbanoffice1.jpg"),
106
- "name": "UrbanOffice1",
107
- "wiki": "https://en.wikipedia.org/wiki/Visual_perception",
108
- "papers": [
109
- "[Adversarially Robust Vision](https://github.com/MadryLab/robustness)",
110
- "[Generative Inference](https://doi.org/10.1016/j.tics.2003.08.003)"
111
- ],
112
  "method": "Prior-Guided Drift Diffusion",
113
  "reverse_diff": {
114
  "model": "resnet50_robust",
115
  "layer": "all",
116
- "initial_noise": 1.0,
117
- "diffusion_noise": 0.002,
118
  "step_size": 1.0,
119
- "iterations": 500,
120
  "epsilon": 40.0
121
  },
122
  "inference_normalization": "off",
123
  "use_adaptive_eps": False,
124
- "use_adaptive_step": True,
125
- "mask_center_x": 0.5,
126
- "mask_center_y": 0.0,
127
- "mask_radius": 0.2,
128
- "mask_sigma": 0.2,
129
- "eps_max_mult": 20.0,
130
- "eps_min_mult": 1.0,
131
- "step_max_mult": 50.0,
132
- "step_min_mult": 0.2,
133
- },
134
- {
135
- "image": os.path.join("stimuli", "Neon_Color_Circle.jpg"),
136
- "name": "Neon Color Spreading",
137
- "wiki": "https://en.wikipedia.org/wiki/Neon_color_spreading",
138
- "papers": [
139
- "[Color Assimilation](https://doi.org/10.1016/j.visres.2000.200.1)",
140
- "[Perceptual Filling-in](https://doi.org/10.1016/j.tics.2003.08.003)"
141
- ],
142
- "method": "Prior-Guided Drift Diffusion",
143
- "reverse_diff": {
144
- "model": "resnet50_robust",
145
- "layer": "layer3",
146
- "initial_noise": 0.8,
147
- "diffusion_noise": 0.003,
148
- "step_size": 1.0,
149
- "iterations": 101,
150
- "epsilon": 20.0
151
- },
152
- "use_adaptive_eps": False,
153
  "use_adaptive_step": False,
154
  "mask_center_x": 0.0,
155
  "mask_center_y": 0.0,
156
  "mask_radius": 0.2,
157
- "mask_sigma": 1.0,
158
- "eps_max_mult": 1.0,
159
  "eps_min_mult": 1.0,
160
- "step_max_mult": 1.0,
161
  "step_min_mult": 1.0,
162
  },
163
  {
164
- "image": os.path.join("stimuli", "Kanizsa_square.jpg"),
165
- "name": "Kanizsa Square",
166
- "wiki": "https://en.wikipedia.org/wiki/Kanizsa_triangle",
167
- "papers": [
168
- "[Gestalt Psychology](https://en.wikipedia.org/wiki/Gestalt_psychology)",
169
- "[Neural Mechanisms](https://doi.org/10.1016/j.tics.2003.08.003)"
170
- ],
171
  "method": "Prior-Guided Drift Diffusion",
172
  "reverse_diff": {
173
  "model": "resnet50_robust",
174
  "layer": "all",
175
- "initial_noise": 0.0,
176
- "diffusion_noise": 0.005,
177
- "step_size": 0.64,
178
- "iterations": 100,
179
- "epsilon": 5.0
180
- },
181
- "use_adaptive_eps": False,
182
- "use_adaptive_step": False,
183
- "mask_center_x": 0.0,
184
- "mask_center_y": 0.0,
185
- "mask_radius": 0.2,
186
- "mask_sigma": 1.0,
187
- "eps_max_mult": 1.0,
188
- "eps_min_mult": 1.0,
189
- "step_max_mult": 1.0,
190
- "step_min_mult": 1.0,
191
- },
192
- {
193
- "image": os.path.join("stimuli", "CornsweetBlock.png"),
194
- "name": "Cornsweet Illusion",
195
- "wiki": "https://en.wikipedia.org/wiki/Cornsweet_illusion",
196
- "papers": [
197
- "[Brightness Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
198
- "[Edge Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
199
- ],
200
- "instructions": "Both blocks are gray in color (the same), use your finger to cover the middle line. Hit 'Load Parameters' and then hit 'Run Generative Inference' to see how the model sees the blocks.",
201
- "method": "Prior-Guided Drift Diffusion",
202
- "reverse_diff": {
203
- "model": "resnet50_robust",
204
- "layer": "layer3",
205
- "initial_noise": 0.5,
206
- "diffusion_noise": 0.005,
207
- "step_size": 0.8,
208
- "iterations": 51,
209
- "epsilon": 20.0
210
- },
211
- "use_adaptive_eps": False,
212
- "use_adaptive_step": False,
213
- "mask_center_x": 0.0,
214
- "mask_center_y": 0.0,
215
- "mask_radius": 0.2,
216
- "mask_sigma": 1.0,
217
- "eps_max_mult": 1.0,
218
- "eps_min_mult": 1.0,
219
- "step_max_mult": 1.0,
220
- "step_min_mult": 1.0,
221
- },
222
- {
223
- "image": os.path.join("stimuli", "face_vase.png"),
224
- "name": "Rubin's Face-Vase (Object Prior)",
225
- "wiki": "https://en.wikipedia.org/wiki/Rubin_vase",
226
- "papers": [
227
- "[Figure-Ground Perception](https://en.wikipedia.org/wiki/Figure-ground_(perception))",
228
- "[Bistable Perception](https://doi.org/10.1016/j.tics.2003.08.003)"
229
- ],
230
- "method": "Prior-Guided Drift Diffusion",
231
- "reverse_diff": {
232
- "model": "resnet50_robust",
233
- "layer": "avgpool",
234
- "initial_noise": 0.9,
235
- "diffusion_noise": 0.003,
236
- "step_size": 0.58,
237
- "iterations": 100,
238
- "epsilon": 0.81
239
- },
240
- "use_adaptive_eps": False,
241
- "use_adaptive_step": False,
242
- "mask_center_x": 0.0,
243
- "mask_center_y": 0.0,
244
- "mask_radius": 0.2,
245
- "mask_sigma": 1.0,
246
- "eps_max_mult": 1.0,
247
- "eps_min_mult": 1.0,
248
- "step_max_mult": 1.0,
249
- "step_min_mult": 1.0,
250
- },
251
- {
252
- "image": os.path.join("stimuli", "Confetti_illusion.png"),
253
- "name": "Confetti Illusion",
254
- "wiki": "https://www.youtube.com/watch?v=SvEiEi8O7QE",
255
- "papers": [
256
- "[Color Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
257
- "[Context Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
258
- ],
259
- "method": "Prior-Guided Drift Diffusion",
260
- "reverse_diff": {
261
- "model": "resnet50_robust",
262
- "layer": "layer3",
263
- "initial_noise": 0.1,
264
- "diffusion_noise": 0.003,
265
- "step_size": 0.5,
266
- "iterations": 101,
267
- "epsilon": 20.0
268
- },
269
- "use_adaptive_eps": False,
270
- "use_adaptive_step": False,
271
- "mask_center_x": 0.0,
272
- "mask_center_y": 0.0,
273
- "mask_radius": 0.2,
274
- "mask_sigma": 1.0,
275
- "eps_max_mult": 1.0,
276
- "eps_min_mult": 1.0,
277
- "step_max_mult": 1.0,
278
- "step_min_mult": 1.0,
279
- },
280
- {
281
- "image": os.path.join("stimuli", "EhresteinSingleColor.png"),
282
- "name": "Ehrenstein Illusion",
283
- "wiki": "https://en.wikipedia.org/wiki/Ehrenstein_illusion",
284
- "papers": [
285
- "[Subjective Contours](https://doi.org/10.1016/j.visres.2000.200.1)",
286
- "[Neural Processing](https://doi.org/10.1016/j.tics.2003.08.003)"
287
- ],
288
- "method": "Prior-Guided Drift Diffusion",
289
- "reverse_diff": {
290
- "model": "resnet50_robust",
291
- "layer": "layer3",
292
- "initial_noise": 0.5,
293
- "diffusion_noise": 0.005,
294
- "step_size": 0.8,
295
- "iterations": 101,
296
- "epsilon": 20.0
297
- },
298
- "use_adaptive_eps": False,
299
- "use_adaptive_step": False,
300
- "mask_center_x": 0.0,
301
- "mask_center_y": 0.0,
302
- "mask_radius": 0.2,
303
- "mask_sigma": 1.0,
304
- "eps_max_mult": 1.0,
305
- "eps_min_mult": 1.0,
306
- "step_max_mult": 1.0,
307
- "step_min_mult": 1.0,
308
- },
309
- {
310
- "image": os.path.join("stimuli", "GroupingByContinuity.png"),
311
- "name": "Grouping by Continuity",
312
- "wiki": "https://en.wikipedia.org/wiki/Principles_of_grouping",
313
- "papers": [
314
- "[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
315
- "[Visual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
316
- ],
317
- "method": "Prior-Guided Drift Diffusion",
318
- "reverse_diff": {
319
- "model": "resnet50_robust",
320
- "layer": "layer3",
321
- "initial_noise": 0.0,
322
- "diffusion_noise": 0.005,
323
- "step_size": 0.4,
324
- "iterations": 101,
325
- "epsilon": 4.0
326
  },
 
327
  "use_adaptive_eps": False,
328
- "use_adaptive_step": False,
329
- "mask_center_x": 0.0,
330
  "mask_center_y": 0.0,
331
  "mask_radius": 0.2,
332
- "mask_sigma": 1.0,
333
- "eps_max_mult": 1.0,
334
  "eps_min_mult": 1.0,
335
- "step_max_mult": 1.0,
336
- "step_min_mult": 1.0,
337
  },
338
- {
339
- "image": os.path.join("stimuli", "figure_ground.png"),
340
- "name": "Figure-Ground Illusion",
341
- "wiki": "https://en.wikipedia.org/wiki/Figure-ground_(perception)",
342
- "papers": [
343
- "[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
344
- "[Perceptual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
345
- ],
346
- "method": "Prior-Guided Drift Diffusion",
347
- "reverse_diff": {
348
- "model": "resnet50_robust",
349
- "layer": "layer3",
350
- "initial_noise": 0.1,
351
- "diffusion_noise": 0.003,
352
- "step_size": 0.5,
353
- "iterations": 101,
354
- "epsilon": 3.0
355
- },
356
- "use_adaptive_eps": False,
357
- "use_adaptive_step": False,
358
- "mask_center_x": 0.0,
359
- "mask_center_y": 0.0,
360
- "mask_radius": 0.2,
361
- "mask_sigma": 1.0,
362
- "eps_max_mult": 1.0,
363
- "eps_min_mult": 1.0,
364
- "step_max_mult": 1.0,
365
- "step_min_mult": 1.0,
366
- }
367
  ]
368
 
369
  def _input_image_stem(image):
@@ -770,12 +522,6 @@ with gr.Blocks(title="Human Hallucination Prediction", css="""
770
  # Right column for the explanation
771
  with gr.Column(scale=2):
772
  gr.Markdown(f"### {ex['name']}")
773
- if ex["name"] not in ("farm1", "ArtGallery1", "UrbanOffice1"):
774
- gr.Markdown(f"[Read more on Wikipedia]({ex['wiki']})")
775
-
776
- # Show instructions if they exist
777
- if "instructions" in ex:
778
- gr.Markdown(f"**Instructions:** {ex['instructions']}")
779
 
780
 
781
  if i < len(examples) - 1: # Don't add separator after the last example
 
41
 
42
  # Define example images and their parameters with updated values from the research
43
  examples = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  {
45
  "image": os.path.join("stimuli", "ArtGallery1.jpg"),
46
  "name": "ArtGallery1",
 
 
 
 
 
47
  "method": "Prior-Guided Drift Diffusion",
48
  "reverse_diff": {
49
  "model": "resnet50_robust",
 
67
  "step_min_mult": 1.0,
68
  },
69
  {
70
+ "image": os.path.join("stimuli", "farm1.jpg"),
71
+ "name": "farm1",
 
 
 
 
 
72
  "method": "Prior-Guided Drift Diffusion",
73
  "reverse_diff": {
74
  "model": "resnet50_robust",
75
  "layer": "all",
76
+ "initial_noise": 0.0,
77
+ "diffusion_noise": 0.02,
78
  "step_size": 1.0,
79
+ "iterations": 501,
80
  "epsilon": 40.0
81
  },
82
  "inference_normalization": "off",
83
  "use_adaptive_eps": False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  "use_adaptive_step": False,
85
  "mask_center_x": 0.0,
86
  "mask_center_y": 0.0,
87
  "mask_radius": 0.2,
88
+ "mask_sigma": 0.3,
89
+ "eps_max_mult": 300.0,
90
  "eps_min_mult": 1.0,
91
+ "step_max_mult": 10.0,
92
  "step_min_mult": 1.0,
93
  },
94
  {
95
+ "image": os.path.join("stimuli", "urbanoffice1.jpg"),
96
+ "name": "UrbanOffice1",
 
 
 
 
 
97
  "method": "Prior-Guided Drift Diffusion",
98
  "reverse_diff": {
99
  "model": "resnet50_robust",
100
  "layer": "all",
101
+ "initial_noise": 1.0,
102
+ "diffusion_noise": 0.002,
103
+ "step_size": 1.0,
104
+ "iterations": 500,
105
+ "epsilon": 40.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  },
107
+ "inference_normalization": "off",
108
  "use_adaptive_eps": False,
109
+ "use_adaptive_step": True,
110
+ "mask_center_x": 0.5,
111
  "mask_center_y": 0.0,
112
  "mask_radius": 0.2,
113
+ "mask_sigma": 0.2,
114
+ "eps_max_mult": 20.0,
115
  "eps_min_mult": 1.0,
116
+ "step_max_mult": 50.0,
117
+ "step_min_mult": 0.2,
118
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  ]
120
 
121
  def _input_image_stem(image):
 
522
  # Right column for the explanation
523
  with gr.Column(scale=2):
524
  gr.Markdown(f"### {ex['name']}")
 
 
 
 
 
 
525
 
526
 
527
  if i < len(examples) - 1: # Don't add separator after the last example