import React, { useState, useEffect } from 'react'; const BenchmarkChart = () => { // Real data sorted by Human Baseline ASR (highest to lowest) const benchmarkData = [ { model: "Grok 4", baseline: 68.67, methods: { keyword_objective_combined: 85.15 } }, { model: "Deepseek R1-0528", baseline: 68.67, methods: { keyword_objective_combined: 83.76 } }, { model: "Gemini 2.5 Pro", baseline: 55.67, methods: { keyword_objective_combined: 74.14, root_problem: 67.19 } }, { model: "Mixtral 8x22B", baseline: 48.00, methods: { keyword_objective_combined: 66.82 } }, { model: "Llama 4 Maverick Instruct", baseline: 45.00, methods: { keyword_objective_combined: 56.46 } }, { model: "Gemini 3", baseline: 32.50, methods: { bio_topic_change: 53.68 } }, { model: "Claude 4 Sonnet", baseline: 26.33, methods: { keyword_objective_combined: 28.64 } }, { model: "GPT o3", baseline: 22.00, methods: { keyword_objective_combined: 30.53 } }, { model: "Claude Opus 4.1", baseline: 20.67, methods: { keyword_objective_combined: 23.56 } }, { model: "GPT 5", baseline: 8.33, methods: { keyword_objective_combined: 11.68, root_problem: 12.46 } } ]; const [currentPhase, setCurrentPhase] = useState('baseline'); const getCurrentValue = (modelData, phase) => { if (phase === 'baseline') { return modelData.baseline; } else if (phase === 'transformation') { // Return the maximum value from all available transformation methods let maxASR = modelData.baseline; Object.values(modelData.methods).forEach(value => { maxASR = Math.max(maxASR, value); }); return maxASR; } return 0; }; const handleToggle = () => { setCurrentPhase(currentPhase === 'baseline' ? 'transformation' : 'baseline'); }; return (

{/* Header */}

JulyAI Safety Benchmark For Frontier Models

Attack Success Rate Analysis with Transformation Methods

{/* Control Button */}

{/* Chart Container - Longer */}

{/* Legend - At Top of Chart */}

Human Baseline

Transformation Extension

{benchmarkData.map((modelData, index) => { const currentValue = getCurrentValue(modelData, currentPhase); const baselineValue = modelData.baseline; const maxValue = 90; const baselineWidth = (baselineValue / maxValue) * 100; const totalWidth = (currentValue / maxValue) * 100; const extensionWidth = totalWidth - baselineWidth; const gain = currentValue - baselineValue; return (

{/* Model Name and Value - Cleaner Layout */}

{modelData.model}

{gain > 0 && ( +{gain.toFixed(1)} )} {currentValue.toFixed(1)}%

{/* Progress Bar - Shows Growth from Baseline */}

{/* Baseline Bar (Blue) - Flat, no rounding for seamless extension */}

{/* Extension Bar (Green) - Only rounded at the end */}

); })}

{/* Methodology Note - Moved Below Chart */}

⚠️

Methodology Note

Additive Visualization: This chart shows cumulative impact by progressively adding each transformation method's individual attack success rate. Values >100% represent transformation of multiple conversations off one failed, human seed conversation. Results are based on HarmBench Grading methodology and should be interpreted as relative performance indicators.

{/* Footer */}

Sorted by Human Baseline ASR (highest to lowest) • Click button above for manual control

Bars extend from baseline to show transformation method impact

); }; export default BenchmarkChart;