import React, { useState, useEffect } from 'react'; const BenchmarkChart = () => { // Real data from your CSV const benchmarkData = [ { model: "Claude 4 Sonnet", direct_conversation: 26.33, keyword_objective_combined: 3.13 }, { model: "Claude Opus 4.1", direct_conversation: 20.67, keyword_objective_combined: 3.65 }, { model: "Deepseek R1-0528", direct_conversation: 68.67, keyword_objective_combined: 48.18 }, { model: "GPT 5", direct_conversation: 8.33, keyword_objective_combined: 3.65, bio_topic_change: 23.5, enhancement: 10, root_problem: 4.5 }, { model: "GPT 5 mini", direct_conversation: 7.67, keyword_objective_combined: 3.91, bio_topic_change: 14.5, enhancement: 5.5, root_problem: 3 }, { model: "GPT o3", direct_conversation: 22, keyword_objective_combined: 10.94 }, { model: "Gemini 2.5 Pro", direct_conversation: 55.67, keyword_objective_combined: 41.67, bio_topic_change: 53.5, enhancement: 47, root_problem: 26 }, { model: "Grok 4", direct_conversation: 68.67, keyword_objective_combined: 52.6 }, { model: "Llama 3.1 405B", direct_conversation: 67, keyword_objective_combined: 41.67 } ]; const [currentPhase, setCurrentPhase] = useState('baseline'); const [currentMethodIndex, setCurrentMethodIndex] = useState(0); const synthesisMethodsOrder = ['keyword_objective_combined', 'bio_topic_change', 'enhancement', 'root_problem']; const phases = [ { key: 'baseline', label: 'Direct Conversation (Baseline)' }, { key: 'additive_synthesis', label: 'Adding Synthesis Methods' } ]; useEffect(() => { const interval = setInterval(() => { setCurrentPhase(prev => prev === 'baseline' ? 'additive_synthesis' : 'baseline'); setCurrentMethodIndex(0); // Reset when switching phases }, 12000); // Increased total cycle time to 12 seconds return () => clearInterval(interval); }, []); useEffect(() => { if (currentPhase === 'additive_synthesis') { const methodInterval = setInterval(() => { setCurrentMethodIndex(prev => { const nextIndex = prev + 1; // Stay at final state (all methods added) for longer if (nextIndex > synthesisMethodsOrder.length) { return synthesisMethodsOrder.length; // Stay at max for longer } return nextIndex; }); }, 2000); // Slower progression - 2 seconds per method return () => clearInterval(methodInterval); } }, [currentPhase]); const getCurrentValue = (modelData, phase) => { if (phase === 'baseline') { return modelData.direct_conversation || 0; } else if (phase === 'additive_synthesis') { let cumulativeValue = modelData.direct_conversation || 0; // Add each synthesis method's contribution up to currentMethodIndex for (let i = 0; i < currentMethodIndex; i++) { const method = synthesisMethodsOrder[i]; if (modelData[method] !== undefined) { cumulativeValue += modelData[method]; } } return cumulativeValue; } return 0; }; const getCurrentMethodsAdded = (modelData, phase) => { if (phase === 'baseline') return ['Direct Conversation']; const methods = ['Direct Conversation']; for (let i = 0; i < currentMethodIndex; i++) { const method = synthesisMethodsOrder[i]; if (modelData[method] !== undefined) { methods.push(method.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase())); } } return methods; }; const getBarColor = (modelData, phase) => { if (phase === 'baseline') { return 'from-blue-500 to-blue-600'; } else { // Green gradient for additive synthesis return 'from-green-500 to-green-600'; } }; return (
{/* Header */}

LLM Safety Benchmark Results

SafetyBench Aug 2025 - Success Rate Comparison

{/* Methodology Disclaimer */}
⚠️

Methodology Note

Additive Visualization: This chart shows cumulative impact by progressively adding each synthesis method's individual success rate. Values >100% represent theoretical maximum vulnerability discovery when combining multiple attack vectors. Results are based on SafetyBench Aug 2025 testing methodology and should be interpreted as relative performance indicators.

Current View: {phases.find(p => p.key === currentPhase)?.label}

{currentPhase === 'additive_synthesis' && currentMethodIndex > 0 && (

Adding Method {currentMethodIndex}: {synthesisMethodsOrder[currentMethodIndex - 1]?.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase())}

)}
{/* Chart Container */}
{benchmarkData.map((modelData, index) => { const currentValue = getCurrentValue(modelData, currentPhase); const baselineValue = modelData.direct_conversation; const maxValue = 100; // Increased max scale since we're adding values const barWidth = (currentValue / maxValue) * 100; const methodsAdded = getCurrentMethodsAdded(modelData, currentPhase); const totalGain = currentValue - baselineValue; return (
{/* Model Name and Methods */}

{modelData.model}

{methodsAdded.join(' + ')}

{currentValue.toFixed(1)}% {currentPhase === 'additive_synthesis' && totalGain > 0 && (
+{totalGain.toFixed(1)}% total gain
)}
{/* Progress Bar */}
{currentValue > 8 ? `${currentValue.toFixed(1)}%` : ''}
{/* Method Breakdown */} {currentPhase === 'additive_synthesis' && currentMethodIndex > 0 && (
Baseline: {baselineValue.toFixed(1)}%
{synthesisMethodsOrder.slice(0, currentMethodIndex).map(method => { if (modelData[method] !== undefined) { return (
+ {method.replace(/_/g, ' ')}: {modelData[method].toFixed(1)}%
); } return null; })}
)}
); })}
{/* Legend */}
Baseline (Direct Conversation)
Additive Synthesis Methods
{/* Footer Info */}

Animation cycles every 12 seconds: baseline (6s) → progressive method addition (2s each) → final results (4s)

Data from SafetyBench Aug 2025 • Synthesis methods test different attack vectors

Interpretation: Each synthesis method represents a different approach to testing model vulnerabilities

Additive Display: Shows theoretical maximum impact when combining all available synthesis techniques

Baseline: Direct conversation represents standard prompting without augmentation techniques

); }; export default BenchmarkChart;