import React, { useState, useEffect } from 'react'; const BenchmarkChart = () => { // Real data sorted by Human Baseline ASR (highest to lowest) const benchmarkData = [ { model: "Grok 4", baseline: 68.67, methods: { keyword_objective_combined: 85.15 } }, { model: "Deepseek R1-0528", baseline: 68.67, methods: { keyword_objective_combined: 83.76 } }, { model: "Gemini 2.5 Pro", baseline: 55.67, methods: { keyword_objective_combined: 74.14, root_problem: 67.19 } }, { model: "Gemini 3", baseline: 55.67, methods: { keyword_objective_combined: 55.67, } }, { model: "Mixtral 8x22B", baseline: 48.00, methods: { keyword_objective_combined: 66.82 } }, { model: "Llama 4 Maverick Instruct", baseline: 45.00, methods: { keyword_objective_combined: 56.46 } }, { model: "Claude 4 Sonnet", baseline: 26.33, methods: { keyword_objective_combined: 28.64 } }, { model: "GPT o3", baseline: 22.00, methods: { keyword_objective_combined: 30.53 } }, { model: "Claude Opus 4.1", baseline: 20.67, methods: { keyword_objective_combined: 23.56 } }, { model: "GPT 5", baseline: 8.33, methods: { keyword_objective_combined: 11.68, root_problem: 12.46 } } ]; const [currentPhase, setCurrentPhase] = useState('baseline'); const [currentMethodIndex, setCurrentMethodIndex] = useState(0); const synthesisMethodsOrder = ['keyword_objective_combined', 'root_problem']; const getCurrentValue = (modelData, phase) => { if (phase === 'baseline') { return modelData.baseline; } else if (phase === 'additive_synthesis') { let maxASR = modelData.baseline; for (let i = 0; i < currentMethodIndex; i++) { const method = synthesisMethodsOrder[i]; if (modelData.methods[method] !== undefined) { maxASR = Math.max(maxASR, modelData.methods[method]); } } return maxASR; } return 0; }; const getCurrentMethod = (modelData, phase) => { if (phase === 'baseline') return 'Human Baseline'; if (currentMethodIndex === 0) return 'Human Baseline'; const availableMethods = []; for (let i = 0; i < currentMethodIndex; i++) { const method = synthesisMethodsOrder[i]; if (modelData.methods[method] !== undefined) { availableMethods.push(method); } } if (availableMethods.length === 0) return 'Human Baseline'; const lastMethod = availableMethods[availableMethods.length - 1]; let transformationName; if (lastMethod === 'keyword_objective_combined') transformationName = 'Keyword/Objective Transformation'; else if (lastMethod === 'root_problem') transformationName = 'Root Problem Transformation'; else transformationName = lastMethod.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase()); return `Human Baseline + ${transformationName}`; }; const handleManualAnimate = () => { if (currentPhase === 'baseline') { setCurrentPhase('additive_synthesis'); setCurrentMethodIndex(0); // Manually step through methods setTimeout(() => setCurrentMethodIndex(1), 500); setTimeout(() => setCurrentMethodIndex(2), 2500); } else { setCurrentPhase('baseline'); setCurrentMethodIndex(0); } }; return (
Attack Success Rate Analysis with Transformation Methods
{/* Control Button */}Methodology Note
Additive Visualization: This chart shows cumulative impact by progressively adding each transformation method's individual attack success rate. Values >100% represent transformation of multiple conversations off one failed, human seed conversation. Results are based on HarmBench Grading methodology and should be interpreted as relative performance indicators.
Sorted by Human Baseline ASR (highest to lowest) • Click button above for manual control
Bars extend from baseline to show transformation method impact