Spaces:

wisent-ai
/

UncensorBench-Leaderboard

Paused

App Files Files Community

lbartoszcze commited on Dec 1, 2025

Commit

a55ede4

verified ·

1 Parent(s): f24cb80

Dynamic methods - accept any method from user submissions

Browse files

Files changed (1) hide show

app.py +56 -20

app.py CHANGED Viewed

@@ -15,9 +15,7 @@ server = app.server
 # Load leaderboard data
 DATA_FILE = "leaderboard.csv"
-# Valid methods for censorship removal
-VALID_METHODS = ["none", "abliteration", "steering", "finetuning", "prompting", "other"]
 METHOD_DESCRIPTIONS = {
     "none": "Baseline (no modification)",
     "abliteration": "Abliteration technique",
@@ -27,6 +25,7 @@ METHOD_DESCRIPTIONS = {
     "other": "Other methods",
 }
 METHOD_COLORS = {
     "none": "#9E9E9E",
     "abliteration": "#E91E63",
@@ -36,6 +35,9 @@ METHOD_COLORS = {
     "other": "#9C27B0",
 }
 def load_data():
     """Load leaderboard data from CSV."""
@@ -55,18 +57,39 @@ def load_data():
         ])
 def calculate_method_stats(df):
     """Calculate statistics for each method including delta from baseline."""
     if len(df) == 0:
-        return pd.DataFrame()
     # Get baseline average (method = "none")
     baseline_df = df[df["method"] == "none"]
     baseline_avg = baseline_df["uncensored_rate"].mean() if len(baseline_df) > 0 else 0
-    # Group by method
     method_stats = []
-    for method in VALID_METHODS:
         method_df = df[df["method"] == method]
         if len(method_df) > 0:
             avg_rate = method_df["uncensored_rate"].mean()
@@ -78,9 +101,12 @@ def calculate_method_stats(df):
             # Find best model for this method
             best_model = method_df.loc[method_df["uncensored_rate"].idxmax(), "model"]
             method_stats.append({
                 "method": method,
-                "description": METHOD_DESCRIPTIONS.get(method, method),
                 "num_models": len(method_df),
                 "avg_uncensored_rate": avg_rate,
                 "max_uncensored_rate": max_rate,
@@ -90,7 +116,7 @@ def calculate_method_stats(df):
                 "best_model": best_model,
             })
-    return pd.DataFrame(method_stats)
 # Column definitions for Models AG Grid
@@ -447,7 +473,7 @@ def render_tab_content(tab, n):
     elif tab == "methods":
         # Methods comparison view
-        method_df = calculate_method_stats(df)
         row_data = method_df.to_dict("records") if len(method_df) > 0 else []
         # Sort by delta from baseline descending
@@ -455,6 +481,22 @@ def render_tab_content(tab, n):
             method_df = method_df.sort_values("delta_from_baseline", ascending=False)
             row_data = method_df.to_dict("records")
         return html.Div([
             # Method comparison description
             html.Div([
@@ -482,19 +524,13 @@ def render_tab_content(tab, n):
                 className="ag-theme-alpine",
             ),
-            # Method legend
             html.Div([
                 html.H4("Method Definitions", style={"marginTop": "30px", "marginBottom": "15px"}),
-                html.Div([
-                    html.Div([
-                        html.Span(
-                            f"● {method}",
-                            style={"color": METHOD_COLORS.get(method, "#666"), "fontWeight": "bold", "marginRight": "10px"}
-                        ),
-                        html.Span(desc, style={"color": "#666"}),
-                    ], style={"marginBottom": "8px"})
-                    for method, desc in METHOD_DESCRIPTIONS.items()
-                ], style={"columns": "2", "columnGap": "40px"}),
             ], style={
                 "backgroundColor": "#f9f9f9",
                 "padding": "20px",

 # Load leaderboard data
 DATA_FILE = "leaderboard.csv"
+# Known method descriptions (for display purposes, but we accept any method)
 METHOD_DESCRIPTIONS = {
     "none": "Baseline (no modification)",
     "abliteration": "Abliteration technique",
     "other": "Other methods",
 }
+# Colors for known methods, dynamic methods get auto-assigned colors
 METHOD_COLORS = {
     "none": "#9E9E9E",
     "abliteration": "#E91E63",
     "other": "#9C27B0",
 }
+# Fallback colors for dynamically discovered methods
+DYNAMIC_COLORS = ["#00BCD4", "#795548", "#607D8B", "#3F51B5", "#009688", "#CDDC39", "#FF5722", "#673AB7"]
 def load_data():
     """Load leaderboard data from CSV."""
         ])
+def get_method_color(method, method_index=0):
+    """Get color for a method, using predefined or dynamic colors."""
+    if method in METHOD_COLORS:
+        return METHOD_COLORS[method]
+    # Assign a dynamic color based on index
+    return DYNAMIC_COLORS[method_index % len(DYNAMIC_COLORS)]
 def calculate_method_stats(df):
     """Calculate statistics for each method including delta from baseline."""
     if len(df) == 0:
+        return pd.DataFrame(), {}
+    # Get all unique methods from the actual data
+    all_methods = df["method"].dropna().unique().tolist()
     # Get baseline average (method = "none")
     baseline_df = df[df["method"] == "none"]
     baseline_avg = baseline_df["uncensored_rate"].mean() if len(baseline_df) > 0 else 0
+    # Build dynamic color mapping for any new methods
+    dynamic_method_colors = {}
+    dynamic_idx = 0
+    for method in all_methods:
+        if method in METHOD_COLORS:
+            dynamic_method_colors[method] = METHOD_COLORS[method]
+        else:
+            dynamic_method_colors[method] = DYNAMIC_COLORS[dynamic_idx % len(DYNAMIC_COLORS)]
+            dynamic_idx += 1
+    # Group by method - iterate over actual methods in the data
     method_stats = []
+    for method in all_methods:
         method_df = df[df["method"] == method]
         if len(method_df) > 0:
             avg_rate = method_df["uncensored_rate"].mean()
             # Find best model for this method
             best_model = method_df.loc[method_df["uncensored_rate"].idxmax(), "model"]
+            # Get description - use predefined or just capitalize the method name
+            description = METHOD_DESCRIPTIONS.get(method, method.replace("_", " ").title())
             method_stats.append({
                 "method": method,
+                "description": description,
                 "num_models": len(method_df),
                 "avg_uncensored_rate": avg_rate,
                 "max_uncensored_rate": max_rate,
                 "best_model": best_model,
             })
+    return pd.DataFrame(method_stats), dynamic_method_colors
 # Column definitions for Models AG Grid
     elif tab == "methods":
         # Methods comparison view
+        method_df, method_colors = calculate_method_stats(df)
         row_data = method_df.to_dict("records") if len(method_df) > 0 else []
         # Sort by delta from baseline descending
             method_df = method_df.sort_values("delta_from_baseline", ascending=False)
             row_data = method_df.to_dict("records")
+        # Build method legend from actual data
+        method_legend_items = []
+        for _, row in method_df.iterrows():
+            method = row["method"]
+            desc = row["description"]
+            color = method_colors.get(method, "#666")
+            method_legend_items.append(
+                html.Div([
+                    html.Span(
+                        f"● {method}",
+                        style={"color": color, "fontWeight": "bold", "marginRight": "10px"}
+                    ),
+                    html.Span(desc, style={"color": "#666"}),
+                ], style={"marginBottom": "8px"})
+            )
         return html.Div([
             # Method comparison description
             html.Div([
                 className="ag-theme-alpine",
             ),
+            # Method legend - dynamically built from actual data
             html.Div([
                 html.H4("Method Definitions", style={"marginTop": "30px", "marginBottom": "15px"}),
+                html.Div(
+                    method_legend_items if method_legend_items else [html.P("No methods submitted yet.", style={"color": "#666"})],
+                    style={"columns": "2", "columnGap": "40px"} if len(method_legend_items) > 3 else {}
+                ),
             ], style={
                 "backgroundColor": "#f9f9f9",
                 "padding": "20px",