Ric commited on
Commit ·
f35fdee
1
Parent(s): d55e7bb
refactor: extract hardcoded data to JSON files
Browse filesMove all dashboard data (leaderboard, capabilities, compatibility,
mean capability change) from hardcoded Python dicts to data/*.json.
Makes it easier to add new model results without editing app.py.
- app.py +18 -94
- data/capabilities.json +24 -0
- data/compatibility.json +26 -0
- data/leaderboard.json +10 -0
- data/mean_capability_change.json +5 -0
app.py
CHANGED
|
@@ -4,6 +4,9 @@ Visualizes results from "Comparative Analysis of LLM Abliteration Methods:
|
|
| 4 |
A Cross-Architecture Evaluation" (arxiv:2512.13655) by Richard J. Young.
|
| 5 |
"""
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import pandas as pd
|
| 9 |
import plotly.express as px
|
|
@@ -68,103 +71,24 @@ CSS = """
|
|
| 68 |
"""
|
| 69 |
|
| 70 |
# ---------------------------------------------------------------------------
|
| 71 |
-
#
|
| 72 |
# ---------------------------------------------------------------------------
|
| 73 |
|
| 74 |
-
|
| 75 |
-
{"Model": "Zephyr-7B-beta", "Parameters": "7B", "Refusals (n=100)": 2,
|
| 76 |
-
"KL Divergence": 0.076, "ASR (%)": 98, "ASR 95% CI": "93.0\u201399.4", "Time": "40m"},
|
| 77 |
-
{"Model": "DeepSeek-7B-chat", "Parameters": "7B", "Refusals (n=100)": 16,
|
| 78 |
-
"KL Divergence": 0.043, "ASR (%)": 84, "ASR 95% CI": "75.6\u201389.9", "Time": "59m"},
|
| 79 |
-
{"Model": "Mistral-7B-v0.3", "Parameters": "7B", "Refusals (n=100)": 16,
|
| 80 |
-
"KL Divergence": 0.317, "ASR (%)": 84, "ASR 95% CI": "75.6\u201389.9", "Time": "39m"},
|
| 81 |
-
{"Model": "Llama-3.1-8B", "Parameters": "8B", "Refusals (n=100)": 24,
|
| 82 |
-
"KL Divergence": 0.056, "ASR (%)": 76, "ASR 95% CI": "66.8\u201383.3", "Time": "33m"},
|
| 83 |
-
{"Model": "Qwen3-8B", "Parameters": "8B", "Refusals (n=100)": 25,
|
| 84 |
-
"KL Divergence": 0.210, "ASR (%)": 75, "ASR 95% CI": "65.7\u201382.5", "Time": "56m"},
|
| 85 |
-
{"Model": "Yi-1.5-9B", "Parameters": "9B", "Refusals (n=100)": 25,
|
| 86 |
-
"KL Divergence": 0.248, "ASR (%)": 75, "ASR 95% CI": "65.7\u201382.5", "Time": "57m"},
|
| 87 |
-
{"Model": "Qwen2.5-7B", "Parameters": "7B", "Refusals (n=100)": 42,
|
| 88 |
-
"KL Divergence": 1.646, "ASR (%)": 58, "ASR 95% CI": "48.2\u201367.2", "Time": "41m"},
|
| 89 |
-
{"Model": "StableLM-2-12B", "Parameters": "12B", "Refusals (n=100)": 54,
|
| 90 |
-
"KL Divergence": 1.605, "ASR (%)": 46, "ASR 95% CI": "36.6\u201355.7", "Time": "109m"},
|
| 91 |
-
]
|
| 92 |
-
|
| 93 |
-
CAPABILITY_DATA = {
|
| 94 |
-
"DeepSeek-7B": [
|
| 95 |
-
{"Variant": "Base", "MMLU": 49.44, "GSM8K": 44.58, "HellaSwag": 77.84},
|
| 96 |
-
{"Variant": "Heretic", "MMLU": 48.95, "GSM8K": 40.11, "HellaSwag": 77.62},
|
| 97 |
-
{"Variant": "DECCP", "MMLU": 49.05, "GSM8K": 43.59, "HellaSwag": 77.99},
|
| 98 |
-
{"Variant": "ErisForge", "MMLU": 49.43, "GSM8K": 44.35, "HellaSwag": 77.69},
|
| 99 |
-
],
|
| 100 |
-
"Mistral-7B": [
|
| 101 |
-
{"Variant": "Base", "MMLU": 59.74, "GSM8K": 48.52, "HellaSwag": 83.28},
|
| 102 |
-
{"Variant": "Heretic", "MMLU": 59.46, "GSM8K": 48.37, "HellaSwag": 83.36},
|
| 103 |
-
{"Variant": "DECCP", "MMLU": 58.98, "GSM8K": 47.61, "HellaSwag": 83.12},
|
| 104 |
-
{"Variant": "ErisForge", "MMLU": 59.42, "GSM8K": 48.29, "HellaSwag": 83.35},
|
| 105 |
-
],
|
| 106 |
-
"Yi-1.5-9B": [
|
| 107 |
-
{"Variant": "Base", "MMLU": 68.02, "GSM8K": 70.89, "HellaSwag": 78.62},
|
| 108 |
-
{"Variant": "Heretic", "MMLU": 66.46, "GSM8K": 52.08, "HellaSwag": 77.08},
|
| 109 |
-
{"Variant": "DECCP", "MMLU": 67.33, "GSM8K": 72.40, "HellaSwag": 77.87},
|
| 110 |
-
{"Variant": "ErisForge", "MMLU": 67.99, "GSM8K": 70.51, "HellaSwag": 78.46},
|
| 111 |
-
],
|
| 112 |
-
"Zephyr-7B": [
|
| 113 |
-
{"Variant": "Heretic", "MMLU": 58.50, "GSM8K": 33.36, "HellaSwag": 82.90},
|
| 114 |
-
{"Variant": "DECCP", "MMLU": 58.28, "GSM8K": 33.21, "HellaSwag": 82.05},
|
| 115 |
-
],
|
| 116 |
-
}
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
"FailSpy": "Yes", "ErisForge": "Yes"},
|
| 132 |
-
{"Model": "Qwen2.5-7B", "Heretic": "Yes", "DECCP": "Yes",
|
| 133 |
-
"FailSpy": "Yes", "ErisForge": "Yes"},
|
| 134 |
-
{"Model": "Gemma-2-9B", "Heretic": "Yes", "DECCP": "Yes",
|
| 135 |
-
"FailSpy": "Yes", "ErisForge": "Yes"},
|
| 136 |
-
{"Model": "Gemma-7B", "Heretic": "Yes", "DECCP": "Yes",
|
| 137 |
-
"FailSpy": "Yes", "ErisForge": "Yes"},
|
| 138 |
-
{"Model": "StableLM-2-12B", "Heretic": "Yes", "DECCP": "Yes",
|
| 139 |
-
"FailSpy": "Partial", "ErisForge": "Yes"},
|
| 140 |
-
{"Model": "Yi-1.5-9B", "Heretic": "Yes", "DECCP": "Yes",
|
| 141 |
-
"FailSpy": "Partial", "ErisForge": "Yes"},
|
| 142 |
-
{"Model": "Zephyr-7B-beta", "Heretic": "Yes", "DECCP": "Yes",
|
| 143 |
-
"FailSpy": "Partial", "ErisForge": "Yes"},
|
| 144 |
-
{"Model": "DeepSeek-7B", "Heretic": "Yes", "DECCP": "Yes",
|
| 145 |
-
"FailSpy": "Partial", "ErisForge": "Yes"},
|
| 146 |
-
{"Model": "OpenChat-3.5", "Heretic": "Yes", "DECCP": "Yes",
|
| 147 |
-
"FailSpy": "Partial", "ErisForge": "No"},
|
| 148 |
-
{"Model": "Qwen3-8B", "Heretic": "Yes", "DECCP": "Yes",
|
| 149 |
-
"FailSpy": "Partial", "ErisForge": "N/A"},
|
| 150 |
-
{"Model": "Vicuna-7B", "Heretic": "Yes", "DECCP": "N/A",
|
| 151 |
-
"FailSpy": "Partial", "ErisForge": "No"},
|
| 152 |
-
{"Model": "InternLM2.5-7B", "Heretic": "Yes", "DECCP": "N/A",
|
| 153 |
-
"FailSpy": "Partial", "ErisForge": "No"},
|
| 154 |
-
{"Model": "Falcon-Mamba-7B", "Heretic": "Yes", "DECCP": "Incompatible",
|
| 155 |
-
"FailSpy": "Incompatible", "ErisForge": "Incompatible"},
|
| 156 |
-
{"Model": "Phi-3-small-8k", "Heretic": "Yes", "DECCP": "N/A",
|
| 157 |
-
"FailSpy": "Partial", "ErisForge": "N/A"},
|
| 158 |
-
{"Model": "Qwen3-14B", "Heretic": "Yes", "DECCP": "N/A",
|
| 159 |
-
"FailSpy": "Partial", "ErisForge": "N/A"},
|
| 160 |
-
]
|
| 161 |
-
|
| 162 |
-
COVERAGE_TOTALS = {
|
| 163 |
-
"Heretic": "16/16 (100%)",
|
| 164 |
-
"DECCP": "11/16 (69%)",
|
| 165 |
-
"FailSpy": "5/16 (31%)",
|
| 166 |
-
"ErisForge": "9/16 (56%)",
|
| 167 |
-
}
|
| 168 |
|
| 169 |
# ---------------------------------------------------------------------------
|
| 170 |
# Helpers
|
|
|
|
| 4 |
A Cross-Architecture Evaluation" (arxiv:2512.13655) by Richard J. Young.
|
| 5 |
"""
|
| 6 |
|
| 7 |
+
import json
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
import gradio as gr
|
| 11 |
import pandas as pd
|
| 12 |
import plotly.express as px
|
|
|
|
| 71 |
"""
|
| 72 |
|
| 73 |
# ---------------------------------------------------------------------------
|
| 74 |
+
# Data (loaded from JSON files in data/ directory)
|
| 75 |
# ---------------------------------------------------------------------------
|
| 76 |
|
| 77 |
+
DATA_DIR = Path(__file__).parent / "data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
+
|
| 80 |
+
def _load_json(name: str):
|
| 81 |
+
with open(DATA_DIR / name) as f:
|
| 82 |
+
return json.load(f)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
LEADERBOARD_ROWS = _load_json("leaderboard.json")
|
| 86 |
+
CAPABILITY_DATA = _load_json("capabilities.json")
|
| 87 |
+
MEAN_CAPABILITY_CHANGE = _load_json("mean_capability_change.json")
|
| 88 |
+
|
| 89 |
+
_compat = _load_json("compatibility.json")
|
| 90 |
+
COMPATIBILITY_ROWS = _compat["rows"]
|
| 91 |
+
COVERAGE_TOTALS = _compat["totals"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
# ---------------------------------------------------------------------------
|
| 94 |
# Helpers
|
data/capabilities.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"DeepSeek-7B": [
|
| 3 |
+
{"Variant": "Base", "MMLU": 49.44, "GSM8K": 44.58, "HellaSwag": 77.84},
|
| 4 |
+
{"Variant": "Heretic", "MMLU": 48.95, "GSM8K": 40.11, "HellaSwag": 77.62},
|
| 5 |
+
{"Variant": "DECCP", "MMLU": 49.05, "GSM8K": 43.59, "HellaSwag": 77.99},
|
| 6 |
+
{"Variant": "ErisForge", "MMLU": 49.43, "GSM8K": 44.35, "HellaSwag": 77.69}
|
| 7 |
+
],
|
| 8 |
+
"Mistral-7B": [
|
| 9 |
+
{"Variant": "Base", "MMLU": 59.74, "GSM8K": 48.52, "HellaSwag": 83.28},
|
| 10 |
+
{"Variant": "Heretic", "MMLU": 59.46, "GSM8K": 48.37, "HellaSwag": 83.36},
|
| 11 |
+
{"Variant": "DECCP", "MMLU": 58.98, "GSM8K": 47.61, "HellaSwag": 83.12},
|
| 12 |
+
{"Variant": "ErisForge", "MMLU": 59.42, "GSM8K": 48.29, "HellaSwag": 83.35}
|
| 13 |
+
],
|
| 14 |
+
"Yi-1.5-9B": [
|
| 15 |
+
{"Variant": "Base", "MMLU": 68.02, "GSM8K": 70.89, "HellaSwag": 78.62},
|
| 16 |
+
{"Variant": "Heretic", "MMLU": 66.46, "GSM8K": 52.08, "HellaSwag": 77.08},
|
| 17 |
+
{"Variant": "DECCP", "MMLU": 67.33, "GSM8K": 72.40, "HellaSwag": 77.87},
|
| 18 |
+
{"Variant": "ErisForge", "MMLU": 67.99, "GSM8K": 70.51, "HellaSwag": 78.46}
|
| 19 |
+
],
|
| 20 |
+
"Zephyr-7B": [
|
| 21 |
+
{"Variant": "Heretic", "MMLU": 58.50, "GSM8K": 33.36, "HellaSwag": 82.90},
|
| 22 |
+
{"Variant": "DECCP", "MMLU": 58.28, "GSM8K": 33.21, "HellaSwag": 82.05}
|
| 23 |
+
]
|
| 24 |
+
}
|
data/compatibility.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rows": [
|
| 3 |
+
{"Model": "Llama-3.1-8B", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Yes", "ErisForge": "Yes"},
|
| 4 |
+
{"Model": "Mistral-7B-v0.3", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Yes", "ErisForge": "Yes"},
|
| 5 |
+
{"Model": "Qwen2.5-7B", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Yes", "ErisForge": "Yes"},
|
| 6 |
+
{"Model": "Gemma-2-9B", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Yes", "ErisForge": "Yes"},
|
| 7 |
+
{"Model": "Gemma-7B", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Yes", "ErisForge": "Yes"},
|
| 8 |
+
{"Model": "StableLM-2-12B", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Partial", "ErisForge": "Yes"},
|
| 9 |
+
{"Model": "Yi-1.5-9B", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Partial", "ErisForge": "Yes"},
|
| 10 |
+
{"Model": "Zephyr-7B-beta", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Partial", "ErisForge": "Yes"},
|
| 11 |
+
{"Model": "DeepSeek-7B", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Partial", "ErisForge": "Yes"},
|
| 12 |
+
{"Model": "OpenChat-3.5", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Partial", "ErisForge": "No"},
|
| 13 |
+
{"Model": "Qwen3-8B", "Heretic": "Yes", "DECCP": "Yes", "FailSpy": "Partial", "ErisForge": "N/A"},
|
| 14 |
+
{"Model": "Vicuna-7B", "Heretic": "Yes", "DECCP": "N/A", "FailSpy": "Partial", "ErisForge": "No"},
|
| 15 |
+
{"Model": "InternLM2.5-7B", "Heretic": "Yes", "DECCP": "N/A", "FailSpy": "Partial", "ErisForge": "No"},
|
| 16 |
+
{"Model": "Falcon-Mamba-7B", "Heretic": "Yes", "DECCP": "Incompatible", "FailSpy": "Incompatible", "ErisForge": "Incompatible"},
|
| 17 |
+
{"Model": "Phi-3-small-8k", "Heretic": "Yes", "DECCP": "N/A", "FailSpy": "Partial", "ErisForge": "N/A"},
|
| 18 |
+
{"Model": "Qwen3-14B", "Heretic": "Yes", "DECCP": "N/A", "FailSpy": "Partial", "ErisForge": "N/A"}
|
| 19 |
+
],
|
| 20 |
+
"totals": {
|
| 21 |
+
"Heretic": "16/16 (100%)",
|
| 22 |
+
"DECCP": "11/16 (69%)",
|
| 23 |
+
"FailSpy": "5/16 (31%)",
|
| 24 |
+
"ErisForge": "9/16 (56%)"
|
| 25 |
+
}
|
| 26 |
+
}
|
data/leaderboard.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{"Model": "Zephyr-7B-beta", "Parameters": "7B", "Refusals (n=100)": 2, "KL Divergence": 0.076, "ASR (%)": 98, "ASR 95% CI": "93.0\u201399.4", "Time": "40m"},
|
| 3 |
+
{"Model": "DeepSeek-7B-chat", "Parameters": "7B", "Refusals (n=100)": 16, "KL Divergence": 0.043, "ASR (%)": 84, "ASR 95% CI": "75.6\u201389.9", "Time": "59m"},
|
| 4 |
+
{"Model": "Mistral-7B-v0.3", "Parameters": "7B", "Refusals (n=100)": 16, "KL Divergence": 0.317, "ASR (%)": 84, "ASR 95% CI": "75.6\u201389.9", "Time": "39m"},
|
| 5 |
+
{"Model": "Llama-3.1-8B", "Parameters": "8B", "Refusals (n=100)": 24, "KL Divergence": 0.056, "ASR (%)": 76, "ASR 95% CI": "66.8\u201383.3", "Time": "33m"},
|
| 6 |
+
{"Model": "Qwen3-8B", "Parameters": "8B", "Refusals (n=100)": 25, "KL Divergence": 0.210, "ASR (%)": 75, "ASR 95% CI": "65.7\u201382.5", "Time": "56m"},
|
| 7 |
+
{"Model": "Yi-1.5-9B", "Parameters": "9B", "Refusals (n=100)": 25, "KL Divergence": 0.248, "ASR (%)": 75, "ASR 95% CI": "65.7\u201382.5", "Time": "57m"},
|
| 8 |
+
{"Model": "Qwen2.5-7B", "Parameters": "7B", "Refusals (n=100)": 42, "KL Divergence": 1.646, "ASR (%)": 58, "ASR 95% CI": "48.2\u201367.2", "Time": "41m"},
|
| 9 |
+
{"Model": "StableLM-2-12B", "Parameters": "12B", "Refusals (n=100)": 54, "KL Divergence": 1.605, "ASR (%)": 46, "ASR 95% CI": "36.6\u201355.7", "Time": "109m"}
|
| 10 |
+
]
|
data/mean_capability_change.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{"Tool": "Heretic", "Avg MMLU \u0394 (pp)": -0.78, "Avg GSM8K \u0394 (pp)": -7.81, "Avg HellaSwag \u0394 (pp)": -0.56},
|
| 3 |
+
{"Tool": "DECCP", "Avg MMLU \u0394 (pp)": -0.61, "Avg GSM8K \u0394 (pp)": -0.13, "Avg HellaSwag \u0394 (pp)": -0.25},
|
| 4 |
+
{"Tool": "ErisForge", "Avg MMLU \u0394 (pp)": -0.12, "Avg GSM8K \u0394 (pp)": -0.28, "Avg HellaSwag \u0394 (pp)": -0.08}
|
| 5 |
+
]
|