Ric commited on
Commit
c7bcf11
·
1 Parent(s): 62958a5

fix: cap method chart at 100%, redesign efficiency scatter with smaller dots and quadrant shading

Browse files
Files changed (1) hide show
  1. app.py +45 -24
app.py CHANGED
@@ -479,7 +479,7 @@ def build_method_comparison(base_model: str) -> go.Figure:
479
  title=f"All Methods Tested on {base_model}",
480
  xaxis_title="Tool / Method",
481
  yaxis_title="ASR (%)",
482
- yaxis_range=[0, 110],
483
  plot_bgcolor="#0e1117",
484
  paper_bgcolor="#0e1117",
485
  font_color="#c4c4c4",
@@ -510,40 +510,61 @@ def build_efficiency_scatter() -> go.Figure:
510
 
511
  df = pd.DataFrame(all_results)
512
 
513
- fig = px.scatter(
514
- df,
515
- x="KL",
516
- y="ASR (%)",
517
- text="Label",
518
- color="Source",
519
- color_discrete_map={"Our Tools": "#e94560", "Community": "#95d5b2"},
520
- size="ASR (%)",
521
- size_max=20,
522
- )
523
- fig.update_traces(textposition="top center", textfont_size=8)
 
 
 
 
 
 
 
 
 
524
 
525
- # Add quadrant annotations
 
 
 
 
 
 
 
 
526
  fig.add_shape(type="line", x0=1.0, x1=1.0, y0=0, y1=100,
527
- line=dict(color="#444", width=1, dash="dash"))
528
  fig.add_shape(type="line", x0=0, x1=12, y0=50, y1=50,
529
- line=dict(color="#444", width=1, dash="dash"))
530
- fig.add_annotation(x=0.3, y=95, text="IDEAL: High ASR, Low KL",
531
- showarrow=False, font=dict(color="#95d5b2", size=10))
532
- fig.add_annotation(x=8, y=95, text="Effective but Damaged",
533
- showarrow=False, font=dict(color="#ffd60a", size=10))
534
- fig.add_annotation(x=0.3, y=10, text="Failed Abliteration",
535
- showarrow=False, font=dict(color="#e94560", size=10))
 
 
536
 
537
  fig.update_layout(
538
- title="Abliteration Efficiency: ASR vs KL Divergence (all sources)",
539
- xaxis_title="KL Divergence (lower = less damage)",
540
  yaxis_title="ASR % (higher = more refusals removed)",
541
- yaxis_range=[0, 105],
 
542
  plot_bgcolor="#0e1117",
543
  paper_bgcolor="#0e1117",
544
  font_color="#c4c4c4",
545
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
546
  margin=dict(t=70, b=40),
 
547
  )
548
  return fig
549
 
 
479
  title=f"All Methods Tested on {base_model}",
480
  xaxis_title="Tool / Method",
481
  yaxis_title="ASR (%)",
482
+ yaxis_range=[0, 100],
483
  plot_bgcolor="#0e1117",
484
  paper_bgcolor="#0e1117",
485
  font_color="#c4c4c4",
 
510
 
511
  df = pd.DataFrame(all_results)
512
 
513
+ fig = go.Figure()
514
+
515
+ for source, color, symbol in [("Our Tools", "#e94560", "circle"), ("Community", "#95d5b2", "diamond")]:
516
+ subset = df[df["Source"] == source]
517
+ if subset.empty:
518
+ continue
519
+ fig.add_trace(go.Scatter(
520
+ x=subset["KL"],
521
+ y=subset["ASR (%)"],
522
+ mode="markers",
523
+ name=source,
524
+ marker=dict(
525
+ color=color,
526
+ size=10,
527
+ symbol=symbol,
528
+ line=dict(width=1, color="#222"),
529
+ ),
530
+ text=subset["Label"],
531
+ hovertemplate="<b>%{text}</b><br>ASR: %{y:.0f}%<br>KL: %{x:.4f}<extra></extra>",
532
+ ))
533
 
534
+ # Quadrant shading
535
+ fig.add_shape(type="rect", x0=0, x1=1.0, y0=50, y1=100,
536
+ fillcolor="rgba(149,213,178,0.06)", line=dict(width=0))
537
+ fig.add_shape(type="rect", x0=1.0, x1=12, y0=50, y1=100,
538
+ fillcolor="rgba(255,214,10,0.04)", line=dict(width=0))
539
+ fig.add_shape(type="rect", x0=0, x1=12, y0=0, y1=50,
540
+ fillcolor="rgba(233,69,96,0.04)", line=dict(width=0))
541
+
542
+ # Quadrant lines
543
  fig.add_shape(type="line", x0=1.0, x1=1.0, y0=0, y1=100,
544
+ line=dict(color="#333", width=1, dash="dot"))
545
  fig.add_shape(type="line", x0=0, x1=12, y0=50, y1=50,
546
+ line=dict(color="#333", width=1, dash="dot"))
547
+
548
+ # Quadrant labels
549
+ fig.add_annotation(x=0.3, y=97, text="Best: High ASR, Low KL",
550
+ showarrow=False, font=dict(color="#95d5b2", size=9))
551
+ fig.add_annotation(x=6, y=97, text="Effective but Damaged",
552
+ showarrow=False, font=dict(color="#ffd60a", size=9))
553
+ fig.add_annotation(x=6, y=5, text="Failed",
554
+ showarrow=False, font=dict(color="#666", size=9))
555
 
556
  fig.update_layout(
557
+ title="Abliteration Efficiency: ASR vs KL Divergence",
558
+ xaxis_title="KL Divergence (lower = less damage to model)",
559
  yaxis_title="ASR % (higher = more refusals removed)",
560
+ yaxis_range=[0, 102],
561
+ xaxis_range=[-0.2, max(df["KL"].max() * 1.1, 2)],
562
  plot_bgcolor="#0e1117",
563
  paper_bgcolor="#0e1117",
564
  font_color="#c4c4c4",
565
  legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
566
  margin=dict(t=70, b=40),
567
+ hoverlabel=dict(bgcolor="#1a1a2e", font_size=12),
568
  )
569
  return fig
570