aryashah00 commited on
Commit
fca32ef
·
verified ·
1 Parent(s): 48db1a1

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +467 -0
app.py ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GF-Score: Fairness-Aware Robustness Auditing Dashboard
3
+ =======================================================
4
+ Hugging Face Spaces entry point.
5
+
6
+ Loads pre-computed evaluation results (no model inference required)
7
+ and serves an interactive Gradio dashboard.
8
+ """
9
+
10
+ import sys
11
+ import json
12
+ import logging
13
+ from pathlib import Path
14
+ from datetime import datetime
15
+
16
+ # Ensure repo root is on the path so gf_score package is importable
17
+ ROOT = Path(__file__).parent.resolve()
18
+ sys.path.insert(0, str(ROOT))
19
+
20
+ import gradio as gr
21
+
22
+ logging.basicConfig(level=logging.INFO)
23
+ logger = logging.getLogger("gf_score.hf_app")
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Paths — resolved relative to repo root (works both locally and on HF)
27
+ # ---------------------------------------------------------------------------
28
+ RESULTS_DIR = ROOT / "outputs" / "results"
29
+ REPORTS_DIR = RESULTS_DIR / "reports"
30
+ REPORTS_DIR.mkdir(parents=True, exist_ok=True)
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Model short-name mappings (copied from config to keep this file standalone)
34
+ # ---------------------------------------------------------------------------
35
+ CIFAR10_SHORT_NAMES = {
36
+ "Augustin2020Adversarial_34_10_extra": "Augustin_WRN_extra",
37
+ "Augustin2020Adversarial_34_10": "Augustin_WRN",
38
+ "Augustin2020Adversarial": "Augustin2020",
39
+ "Ding2020MMA": "Ding_MMA",
40
+ "Engstrom2019Robustness": "Engstrom2019",
41
+ "Gowal2020Uncovering": "Gowal2020",
42
+ "Gowal2020Uncovering_extra": "Gowal_extra",
43
+ "Rade2021Helper_R18_ddpm": "Rade_R18",
44
+ "Rebuffi2021Fixing_28_10_cutmix_ddpm": "Rebuffi_28_ddpm",
45
+ "Rebuffi2021Fixing_70_16_cutmix_ddpm": "Rebuffi_70_ddpm",
46
+ "Rebuffi2021Fixing_70_16_cutmix_extra":"Rebuffi_extra",
47
+ "Rebuffi2021Fixing_R18_cutmix_ddpm": "Rebuffi_R18",
48
+ "Rice2020Overfitting": "Rice2020",
49
+ "Rony2019Decoupling": "Rony2019",
50
+ "Sehwag2021Proxy": "Sehwag_Proxy",
51
+ "Sehwag2021Proxy_R18": "Sehwag_R18",
52
+ "Wu2020Adversarial": "Wu2020",
53
+ }
54
+
55
+ IMAGENET_SHORT_NAMES = {
56
+ "Salman2020Do_50_2": "Salman_WRN50-2",
57
+ "Salman2020Do_R50": "Salman_R50",
58
+ "Engstrom2019Robustness": "Engstrom2019",
59
+ "Wong2020Fast": "Wong2020",
60
+ "Salman2020Do_R18": "Salman_R18",
61
+ }
62
+
63
+ SHORT_NAMES = {"cifar10": CIFAR10_SHORT_NAMES, "imagenet": IMAGENET_SHORT_NAMES}
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Data loading
67
+ # ---------------------------------------------------------------------------
68
+
69
+ def load_results(dataset: str):
70
+ suffix = f"_{dataset}" if dataset != "cifar10" else ""
71
+ path = RESULTS_DIR / f"full_results{suffix}.json"
72
+ if not path.exists():
73
+ logger.warning(f"Results file not found: {path}")
74
+ return None
75
+ with open(path, "r") as f:
76
+ return json.load(f)
77
+
78
+
79
+ def get_available_datasets():
80
+ available = []
81
+ if (RESULTS_DIR / "full_results.json").exists():
82
+ available.append("cifar10")
83
+ if (RESULTS_DIR / "full_results_imagenet.json").exists():
84
+ available.append("imagenet")
85
+ return available or ["cifar10"]
86
+
87
+
88
+ def get_model_choices(results, dataset: str):
89
+ if results is None:
90
+ return []
91
+ names = SHORT_NAMES.get(dataset, {})
92
+ return [names.get(m, m) for m in results["model_results"].keys()]
93
+
94
+
95
+ def display_name_to_full(display_name: str, results, dataset: str):
96
+ names = SHORT_NAMES.get(dataset, {})
97
+ for full, short in names.items():
98
+ if short == display_name and full in results["model_results"]:
99
+ return full
100
+ return display_name if display_name in results["model_results"] else None
101
+
102
+
103
+ def get_class_names(results):
104
+ if results is None:
105
+ return []
106
+ meta = results.get("metadata", {})
107
+ cls = meta.get("class_names")
108
+ if cls:
109
+ return cls
110
+ model_results = results.get("model_results", {})
111
+ if model_results:
112
+ first = next(iter(model_results.values()))
113
+ return list(first.get("per_class_scores", {}).keys())
114
+ return []
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # Analysis
119
+ # ---------------------------------------------------------------------------
120
+
121
+ def analyze_model(model_display_name, lambda_val, dataset, results):
122
+ if results is None:
123
+ return (
124
+ "⚠️ **No results found.** The pre-computed evaluation files are missing.\n\n"
125
+ "Please ensure `outputs/results/full_results.json` (and `full_results_imagenet.json`) "
126
+ "are committed to the Space repository.",
127
+ "<p>No data available.</p>",
128
+ )
129
+
130
+ full_name = display_name_to_full(model_display_name, results, dataset)
131
+ if full_name is None:
132
+ return f"Model `{model_display_name}` not found in results.", "<p>Not found.</p>"
133
+
134
+ r = results["model_results"][full_name]
135
+ class_names = get_class_names(results)
136
+ num_classes = len(class_names)
137
+
138
+ agg = r["aggregate_great_score"]
139
+ rdi = r["rdi"]
140
+ nrgc = r["nrgc"]
141
+ wcr = r["wcr"]
142
+ wcr_class = r.get("wcr_class", "—")
143
+ fp_great = agg - lambda_val * rdi
144
+ fp_at_0 = agg
145
+ fp_at_1 = agg - rdi
146
+
147
+ ds_label = "CIFAR-10" if dataset == "cifar10" else "ImageNet"
148
+ threat = "L2 (ε=0.5)" if dataset == "cifar10" else "L∞ (ε=4/255)"
149
+
150
+ rdi_icon = "✅ Low" if rdi < 0.1 else ("⚠️ Moderate" if rdi < 0.3 else "❌ High")
151
+ wcr_icon = "✅ Good" if wcr > 0.2 else ("⚠️ Low" if wcr > 0.05 else "❌ Critical")
152
+ fp_interp = (
153
+ "No fairness penalty (= aggregate GREAT Score)" if lambda_val == 0.0 else
154
+ "Mild fairness adjustment" if lambda_val < 0.3 else
155
+ "Balanced robustness-fairness trade-off" if lambda_val < 0.7 else
156
+ "Strong fairness emphasis"
157
+ )
158
+
159
+ # ---- Markdown analysis output ----
160
+ md = f"""## 🛡️ {model_display_name}
161
+ **Dataset:** {ds_label} &nbsp;|&nbsp; **Threat Model:** {threat} &nbsp;|&nbsp; **Classes:** {num_classes}
162
+
163
+ ---
164
+
165
+ ### Aggregate Metrics
166
+
167
+ | Metric | Value | Status |
168
+ |--------|------:|--------|
169
+ | **GREAT Score** (Ω̂) | `{agg:.4f}` | Certified robustness lower bound |
170
+ | **RDI** (Disparity) | `{rdi:.4f}` | {rdi_icon} |
171
+ | **NRGC** (Gini) | `{nrgc:.4f}` | Class inequality index ∈ [0, 1) |
172
+ | **WCR** (Worst-Case)| `{wcr:.4f}` | {wcr_icon} — worst class: `{wcr_class}` |
173
+
174
+ ---
175
+
176
+ ### 🎛️ Fairness-Penalized Score (FP-GREAT)
177
+
178
+ **FP-GREAT = Ω̄ − λ × RDI = {agg:.4f} − {lambda_val:.2f} × {rdi:.4f} = `{fp_great:.4f}`**
179
+
180
+ *{fp_interp}*
181
+
182
+ | λ | FP-GREAT | Meaning |
183
+ |---|----------:|---------|
184
+ | 0.00 | {fp_at_0:.4f} | Pure robustness (no penalty) |
185
+ | **{lambda_val:.2f}** | **{fp_great:.4f}** | ← Current |
186
+ | 1.00 | {fp_at_1:.4f} | Max fairness penalty |
187
+
188
+ ---
189
+
190
+ ### Per-Class Robustness Scores
191
+ """
192
+
193
+ per_class = r.get("per_class_scores", {})
194
+ per_acc = r.get("per_class_accuracy", {})
195
+ max_score = max(per_class.values()) if per_class else 1.0
196
+
197
+ if num_classes > 30:
198
+ sorted_cls = sorted(per_class.keys(), key=lambda c: per_class.get(c, 0))
199
+ bottom10, top10 = sorted_cls[:10], sorted_cls[-10:]
200
+
201
+ md += f"*{num_classes} total classes — showing bottom 10 and top 10:*\n\n"
202
+ md += "**🔴 Bottom 10 — Most Vulnerable:**\n\n"
203
+ md += "| Class | Score | Accuracy | Bar |\n|-------|------:|----------:|-----|\n"
204
+ for cls in bottom10:
205
+ s = per_class.get(cls, 0)
206
+ a = per_acc.get(cls, 0)
207
+ bar = "█" * int(s / max(max_score, 0.001) * 15)
208
+ md += f"| `{cls}` | {s:.4f} | {a:.1%} | {bar} |\n"
209
+
210
+ md += "\n**🟢 Top 10 — Most Robust:**\n\n"
211
+ md += "| Class | Score | Accuracy | Bar |\n|-------|------:|----------:|-----|\n"
212
+ for cls in top10:
213
+ s = per_class.get(cls, 0)
214
+ a = per_acc.get(cls, 0)
215
+ bar = "█" * int(s / max(max_score, 0.001) * 15)
216
+ md += f"| `{cls}` | {s:.4f} | {a:.1%} | {bar} |\n"
217
+ else:
218
+ md += "| Class | Score | Accuracy | Bar |\n|-------|------:|----------:|-----|\n"
219
+ for cls in class_names:
220
+ s = per_class.get(cls, 0)
221
+ a = per_acc.get(cls, 0)
222
+ bar = "█" * int(s / max(max_score, 0.001) * 15)
223
+ md += f"| `{cls}` | {s:.4f} | {a:.1%} | {bar} |\n"
224
+
225
+ vuln = r.get("vulnerability_ranking", [])
226
+ if vuln:
227
+ display_vuln = vuln[:10] if num_classes > 30 else vuln
228
+ suffix_txt = f" (top 10 of {num_classes})" if num_classes > 30 else ""
229
+ md += f"\n### Vulnerability Ranking{suffix_txt}\n"
230
+ for rank, (cls, score) in enumerate(display_vuln, 1):
231
+ icon = "🔴" if rank <= 3 else ("🟡" if rank <= len(display_vuln) - 3 else "🟢")
232
+ md += f"{rank}. {icon} **`{cls}`**: {score:.4f}\n"
233
+
234
+ # ---- HTML audit report ----
235
+ html = _build_html_report(
236
+ model_display_name, r, ds_label, threat, num_classes,
237
+ class_names, per_class, per_acc, max_score, vuln,
238
+ agg, rdi, nrgc, wcr, wcr_class, fp_great, lambda_val,
239
+ )
240
+
241
+ return md, html
242
+
243
+
244
+ def _build_html_report(
245
+ model_name, r, ds_label, threat, num_classes,
246
+ class_names, per_class, per_acc, max_score, vuln,
247
+ agg, rdi, nrgc, wcr, wcr_class, fp_great, lambda_val,
248
+ ):
249
+ rdi_css = "pass" if rdi < 0.1 else ("warn" if rdi < 0.3 else "fail")
250
+ wcr_css = "pass" if wcr > 0.2 else ("warn" if wcr > 0.05 else "fail")
251
+
252
+ # Build per-class table rows
253
+ if num_classes > 30:
254
+ sorted_cls = sorted(per_class.keys(), key=lambda c: per_class.get(c, 0))
255
+ display_cls = sorted_cls[:10] + sorted_cls[-10:]
256
+ else:
257
+ display_cls = class_names
258
+
259
+ class_rows = ""
260
+ for cls in display_cls:
261
+ s = per_class.get(cls, 0)
262
+ a = per_acc.get(cls, 0)
263
+ w = int(s / max(max_score, 0.001) * 200)
264
+ class_rows += (
265
+ f"<tr><td>{cls}</td><td>{s:.4f}</td><td>{a:.1%}</td>"
266
+ f'<td><div class="bar" style="width:{w}px"></div></td></tr>\n'
267
+ )
268
+
269
+ vuln_rows = ""
270
+ total_v = len(vuln)
271
+ for rank, (cls, score) in enumerate((vuln[:10] if num_classes > 30 else vuln), 1):
272
+ if rank <= 3:
273
+ status = '<span class="fail">⚠ Vulnerable</span>'
274
+ elif rank >= total_v - 2:
275
+ status = '<span class="pass">✓ Robust</span>'
276
+ else:
277
+ status = '<span class="warn">— Average</span>'
278
+ vuln_rows += f"<tr><td>{rank}</td><td>{cls}</td><td>{score:.4f}</td><td>{status}</td></tr>\n"
279
+
280
+ if rdi >= 0.3:
281
+ assessment = (
282
+ f"<strong class='fail'>High disparity (RDI={rdi:.3f}).</strong> "
283
+ f"Class <em>{wcr_class}</em> is significantly more vulnerable. "
284
+ )
285
+ elif rdi >= 0.1:
286
+ assessment = f"<strong class='warn'>Moderate disparity (RDI={rdi:.3f}).</strong> Some classes are noticeably more vulnerable."
287
+ else:
288
+ assessment = f"<strong class='pass'>Low disparity (RDI={rdi:.3f}).</strong> Robustness is distributed relatively evenly across classes."
289
+
290
+ if wcr < 0.05:
291
+ assessment += f" <strong class='fail'>Critical:</strong> Worst-case class ({wcr_class}) has near-zero robustness (WCR={wcr:.4f})."
292
+ elif wcr < 0.2:
293
+ assessment += f" Worst-case class ({wcr_class}) has limited robustness (WCR={wcr:.4f})."
294
+
295
+ return f"""<!DOCTYPE html>
296
+ <html lang="en">
297
+ <head>
298
+ <meta charset="UTF-8">
299
+ <title>GF-Score Audit — {model_name}</title>
300
+ <style>
301
+ body{{font-family:'Segoe UI',sans-serif;margin:0;background:#f5f7fa;color:#333}}
302
+ .wrap{{max-width:860px;margin:24px auto;background:#fff;padding:36px;border-radius:10px;box-shadow:0 2px 12px rgba(0,0,0,.1)}}
303
+ h1{{color:#2c3e50;border-bottom:3px solid #3498db;padding-bottom:8px;font-size:1.4em}}
304
+ h2{{color:#34495e;margin-top:28px;font-size:1.1em}}
305
+ .cards{{display:flex;flex-wrap:wrap;gap:12px;margin:12px 0}}
306
+ .card{{background:#ecf0f1;padding:14px 18px;border-radius:8px;text-align:center;min-width:130px}}
307
+ .card .val{{font-size:1.7em;font-weight:700;color:#2c3e50}}
308
+ .card .lbl{{font-size:.7em;color:#7f8c8d;text-transform:uppercase;margin-top:2px}}
309
+ .pass{{color:#27ae60}}.warn{{color:#e67e22}}.fail{{color:#e74c3c}}
310
+ table{{border-collapse:collapse;width:100%;margin:12px 0;font-size:.9em}}
311
+ th{{background:#3498db;color:#fff;padding:9px 14px;text-align:left}}
312
+ td{{padding:7px 14px;border-bottom:1px solid #eee}}
313
+ tr:nth-child(even){{background:#f9f9f9}}
314
+ .bar{{height:14px;background:linear-gradient(90deg,#3498db,#2ecc71);border-radius:3px;display:inline-block}}
315
+ .footer{{margin-top:24px;padding-top:12px;border-top:1px solid #eee;font-size:.75em;color:#aaa}}
316
+ </style>
317
+ </head>
318
+ <body>
319
+ <div class="wrap">
320
+ <h1>🛡️ GF-Score Robustness Audit Report</h1>
321
+ <p><strong>Model:</strong> {model_name}<br>
322
+ <strong>Dataset:</strong> {ds_label} &nbsp;|&nbsp; <strong>Threat Model:</strong> {threat} &nbsp;|&nbsp; <strong>Classes:</strong> {num_classes}<br>
323
+ <strong>Generated:</strong> {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}</p>
324
+
325
+ <h2>Summary Metrics</h2>
326
+ <div class="cards">
327
+ <div class="card"><div class="val">{agg:.4f}</div><div class="lbl">GREAT Score</div></div>
328
+ <div class="card"><div class="val {rdi_css}">{rdi:.4f}</div><div class="lbl">RDI</div></div>
329
+ <div class="card"><div class="val">{nrgc:.4f}</div><div class="lbl">NRGC (Gini)</div></div>
330
+ <div class="card"><div class="val {wcr_css}">{wcr:.4f}</div><div class="lbl">WCR ({wcr_class})</div></div>
331
+ <div class="card"><div class="val">{fp_great:.4f}</div><div class="lbl">FP-GREAT (λ={lambda_val})</div></div>
332
+ </div>
333
+
334
+ <h2>Per-Class Robustness Profile</h2>
335
+ <table>
336
+ <tr><th>Class</th><th>GREAT Score</th><th>Clean Acc.</th><th>Visual</th></tr>
337
+ {class_rows}
338
+ </table>
339
+
340
+ <h2>Vulnerability Ranking</h2>
341
+ <table>
342
+ <tr><th>Rank</th><th>Class</th><th>Score</th><th>Status</th></tr>
343
+ {vuln_rows}
344
+ </table>
345
+
346
+ <h2>Assessment</h2>
347
+ <p>{assessment}</p>
348
+
349
+ <div class="footer">
350
+ GF-Score v0.1.0 · Based on GREAT Score (Li et al., NeurIPS 2024) extended with per-class fairness metrics ·
351
+ Metrics: RDI (Max Group Disparity), NRGC (Gini), WCR (Rawlsian Maximin), FP-GREAT (IHDI Adaptation)
352
+ </div>
353
+ </div>
354
+ </body>
355
+ </html>"""
356
+
357
+
358
+ # ---------------------------------------------------------------------------
359
+ # Gradio App
360
+ # ---------------------------------------------------------------------------
361
+
362
+ def build_app():
363
+ available = get_available_datasets()
364
+ default_ds = available[0]
365
+
366
+ results_cache = {ds: load_results(ds) for ds in available}
367
+
368
+ dataset_labels = {
369
+ "cifar10": "CIFAR-10 (10 classes · L2 threat model · 17 models)",
370
+ "imagenet": "ImageNet (1000 classes · L∞ threat model · 5 models)",
371
+ }
372
+
373
+ with gr.Blocks(
374
+ title="GF-Score Auditing Dashboard",
375
+ theme=gr.themes.Soft(),
376
+ css=".gr-markdown table { width: 100%; }",
377
+ ) as demo:
378
+
379
+ current_ds = gr.State(default_ds)
380
+ current_results = gr.State(results_cache.get(default_ds))
381
+
382
+ gr.Markdown("""
383
+ # 🛡️ GF-Score: Fairness-Aware Robustness Auditing Dashboard
384
+
385
+ Inspect **class-conditional adversarial robustness** of certified models with four fairness metrics
386
+ grounded in welfare economics. Based on [GREAT Score (NeurIPS 2024)](https://arxiv.org/abs/2304.09875),
387
+ extended with per-class decomposition, disparity analysis, and **attack-free** self-calibration.
388
+
389
+ | Metric | Meaning |
390
+ |--------|---------|
391
+ | **RDI** | Range of per-class robustness (Max Group Disparity) |
392
+ | **NRGC** | Normalized Gini Coefficient — overall inequality |
393
+ | **WCR** | Worst-case class robustness (Rawlsian maximin) |
394
+ | **FP-GREAT** | Fairness-penalized aggregate score: Ω̄ − λ·RDI |
395
+ """)
396
+
397
+ with gr.Row():
398
+ dataset_dd = gr.Dropdown(
399
+ choices=[(dataset_labels[ds], ds) for ds in available],
400
+ value=default_ds,
401
+ label="Dataset",
402
+ scale=2,
403
+ )
404
+ model_dd = gr.Dropdown(
405
+ choices=get_model_choices(results_cache.get(default_ds), default_ds),
406
+ value=(get_model_choices(results_cache.get(default_ds), default_ds) or [None])[0],
407
+ label="Model",
408
+ scale=2,
409
+ )
410
+
411
+ with gr.Row():
412
+ lambda_sl = gr.Slider(
413
+ minimum=0.0, maximum=1.0, value=0.5, step=0.05,
414
+ label="Fairness Penalty λ (FP-GREAT = GREAT Score − λ × RDI)",
415
+ scale=3,
416
+ )
417
+ analyze_btn = gr.Button("🔍 Analyze", variant="primary", scale=1)
418
+
419
+ with gr.Tabs():
420
+ with gr.TabItem("📊 Analysis"):
421
+ analysis_md = gr.Markdown()
422
+ with gr.TabItem("📄 Full HTML Report"):
423
+ report_html = gr.HTML()
424
+
425
+ # ---- callbacks ----
426
+
427
+ def on_dataset_change(ds_choice):
428
+ res = results_cache.get(ds_choice) or load_results(ds_choice)
429
+ results_cache[ds_choice] = res
430
+ choices = get_model_choices(res, ds_choice)
431
+ default_model = choices[0] if choices else None
432
+ return (
433
+ gr.update(choices=choices, value=default_model),
434
+ ds_choice,
435
+ res,
436
+ )
437
+
438
+ def run(model_name, lam, ds, res):
439
+ md, html = analyze_model(model_name, lam, ds, res)
440
+ return md, html
441
+
442
+ dataset_dd.change(
443
+ fn=on_dataset_change,
444
+ inputs=[dataset_dd],
445
+ outputs=[model_dd, current_ds, current_results],
446
+ )
447
+ analyze_btn.click(
448
+ fn=run,
449
+ inputs=[model_dd, lambda_sl, current_ds, current_results],
450
+ outputs=[analysis_md, report_html],
451
+ )
452
+ lambda_sl.release(
453
+ fn=run,
454
+ inputs=[model_dd, lambda_sl, current_ds, current_results],
455
+ outputs=[analysis_md, report_html],
456
+ )
457
+
458
+ gr.Markdown("""---
459
+ *GF-Score v0.1.0 · [Paper (NeurIPS 2026, under review)]() · [GitHub](https://github.com/aryashah00/GF-Score)*""")
460
+
461
+ return demo
462
+
463
+
464
+ demo = build_app()
465
+
466
+ if __name__ == "__main__":
467
+ demo.launch()