Spaces:

richardyoung
/

abliteration-methods-dashboard

Sleeping

App Files Files Community

Ric commited on 20 days ago

Commit

c7bcf11

1 Parent(s): 62958a5

fix: cap method chart at 100%, redesign efficiency scatter with smaller dots and quadrant shading

Browse files

Files changed (1) hide show

app.py +45 -24

app.py CHANGED Viewed

@@ -479,7 +479,7 @@ def build_method_comparison(base_model: str) -> go.Figure:
         title=f"All Methods Tested on {base_model}",
         xaxis_title="Tool / Method",
         yaxis_title="ASR (%)",
-        yaxis_range=[0, 110],
         plot_bgcolor="#0e1117",
         paper_bgcolor="#0e1117",
         font_color="#c4c4c4",
@@ -510,40 +510,61 @@ def build_efficiency_scatter() -> go.Figure:
     df = pd.DataFrame(all_results)
-    fig = px.scatter(
-        df,
-        x="KL",
-        y="ASR (%)",
-        text="Label",
-        color="Source",
-        color_discrete_map={"Our Tools": "#e94560", "Community": "#95d5b2"},
-        size="ASR (%)",
-        size_max=20,
-    )
-    fig.update_traces(textposition="top center", textfont_size=8)
-    # Add quadrant annotations
     fig.add_shape(type="line", x0=1.0, x1=1.0, y0=0, y1=100,
-                  line=dict(color="#444", width=1, dash="dash"))
     fig.add_shape(type="line", x0=0, x1=12, y0=50, y1=50,
-                  line=dict(color="#444", width=1, dash="dash"))
-    fig.add_annotation(x=0.3, y=95, text="IDEAL: High ASR, Low KL",
-                       showarrow=False, font=dict(color="#95d5b2", size=10))
-    fig.add_annotation(x=8, y=95, text="Effective but Damaged",
-                       showarrow=False, font=dict(color="#ffd60a", size=10))
-    fig.add_annotation(x=0.3, y=10, text="Failed Abliteration",
-                       showarrow=False, font=dict(color="#e94560", size=10))
     fig.update_layout(
-        title="Abliteration Efficiency: ASR vs KL Divergence (all sources)",
-        xaxis_title="KL Divergence (lower = less damage)",
         yaxis_title="ASR % (higher = more refusals removed)",
-        yaxis_range=[0, 105],
         plot_bgcolor="#0e1117",
         paper_bgcolor="#0e1117",
         font_color="#c4c4c4",
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
         margin=dict(t=70, b=40),
     )
     return fig

         title=f"All Methods Tested on {base_model}",
         xaxis_title="Tool / Method",
         yaxis_title="ASR (%)",
+        yaxis_range=[0, 100],
         plot_bgcolor="#0e1117",
         paper_bgcolor="#0e1117",
         font_color="#c4c4c4",
     df = pd.DataFrame(all_results)
+    fig = go.Figure()
+    for source, color, symbol in [("Our Tools", "#e94560", "circle"), ("Community", "#95d5b2", "diamond")]:
+        subset = df[df["Source"] == source]
+        if subset.empty:
+            continue
+        fig.add_trace(go.Scatter(
+            x=subset["KL"],
+            y=subset["ASR (%)"],
+            mode="markers",
+            name=source,
+            marker=dict(
+                color=color,
+                size=10,
+                symbol=symbol,
+                line=dict(width=1, color="#222"),
+            ),
+            text=subset["Label"],
+            hovertemplate="<b>%{text}</b><br>ASR: %{y:.0f}%<br>KL: %{x:.4f}<extra></extra>",
+        ))
+    # Quadrant shading
+    fig.add_shape(type="rect", x0=0, x1=1.0, y0=50, y1=100,
+                  fillcolor="rgba(149,213,178,0.06)", line=dict(width=0))
+    fig.add_shape(type="rect", x0=1.0, x1=12, y0=50, y1=100,
+                  fillcolor="rgba(255,214,10,0.04)", line=dict(width=0))
+    fig.add_shape(type="rect", x0=0, x1=12, y0=0, y1=50,
+                  fillcolor="rgba(233,69,96,0.04)", line=dict(width=0))
+    # Quadrant lines
     fig.add_shape(type="line", x0=1.0, x1=1.0, y0=0, y1=100,
+                  line=dict(color="#333", width=1, dash="dot"))
     fig.add_shape(type="line", x0=0, x1=12, y0=50, y1=50,
+                  line=dict(color="#333", width=1, dash="dot"))
+    # Quadrant labels
+    fig.add_annotation(x=0.3, y=97, text="Best: High ASR, Low KL",
+                       showarrow=False, font=dict(color="#95d5b2", size=9))
+    fig.add_annotation(x=6, y=97, text="Effective but Damaged",
+                       showarrow=False, font=dict(color="#ffd60a", size=9))
+    fig.add_annotation(x=6, y=5, text="Failed",
+                       showarrow=False, font=dict(color="#666", size=9))
     fig.update_layout(
+        title="Abliteration Efficiency: ASR vs KL Divergence",
+        xaxis_title="KL Divergence (lower = less damage to model)",
         yaxis_title="ASR % (higher = more refusals removed)",
+        yaxis_range=[0, 102],
+        xaxis_range=[-0.2, max(df["KL"].max() * 1.1, 2)],
         plot_bgcolor="#0e1117",
         paper_bgcolor="#0e1117",
         font_color="#c4c4c4",
         legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
         margin=dict(t=70, b=40),
+        hoverlabel=dict(bgcolor="#1a1a2e", font_size=12),
     )
     return fig