File size: 9,796 Bytes
2607058
 
 
30360e9
2607058
 
30360e9
 
 
 
 
2607058
 
30360e9
 
 
 
 
2607058
 
30360e9
 
 
 
 
2607058
 
30360e9
 
 
 
 
 
2607058
 
30360e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2607058
 
 
30360e9
 
 
 
2607058
 
30360e9
 
 
 
 
2607058
 
30360e9
 
 
 
 
2607058
 
30360e9
 
 
 
 
 
 
 
 
 
 
 
 
 
2607058
 
 
 
 
 
1e8671b
2607058
 
30360e9
1e8671b
2607058
 
 
 
 
30360e9
 
2607058
 
 
 
 
 
 
 
 
 
30360e9
2607058
 
 
30360e9
2607058
 
 
 
 
 
30360e9
2607058
1e8671b
30360e9
2607058
 
 
30360e9
 
2607058
 
 
30360e9
2607058
 
 
 
30360e9
 
 
2607058
30360e9
2607058
 
30360e9
 
2607058
 
30360e9
 
1e8671b
 
 
 
 
 
2607058
 
 
 
 
c1ae531
2607058
c1ae531
 
2607058
 
5f3beb1
 
30360e9
2607058
 
30360e9
 
1e8671b
2607058
30360e9
 
2607058
 
30360e9
 
2607058
30360e9
2607058
 
 
1e8671b
 
c1ae531
2607058
 
 
 
 
 
30360e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1ae531
30360e9
 
 
 
 
 
 
c1ae531
 
 
2607058
30360e9
 
 
 
 
 
 
 
 
2607058
 
 
 
30360e9
 
 
2607058
 
 
30360e9
2607058
30360e9
 
2607058
 
30360e9
1e8671b
2607058
 
 
 
30360e9
 
2607058
30360e9
 
 
1e8671b
2607058
 
 
5f3beb1
 
2607058
5f3beb1
2607058
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
import React, { useState, useEffect } from 'react';

const BenchmarkChart = () => {
  // Real data from your ASR calculations - sorted by highest achievable ASR
  const benchmarkData = [
    {
      model: "Grok 4",
      baseline: 68.67,
      methods: {
        keyword_objective_combined: 85.15
      }
    },
    {
      model: "Deepseek R1-0528",
      baseline: 68.67,
      methods: {
        keyword_objective_combined: 83.76
      }
    },
    {
      model: "Llama 3.1 405B",
      baseline: 67.00,
      methods: {
        keyword_objective_combined: 80.75
      }
    },
    {
      model: "Gemini 2.5 Pro",
      baseline: 55.67,
      methods: {
        keyword_objective_combined: 74.14,
        root_problem: 67.19
      }
    },
    {
      model: "Llama 3 8B Instruct Reference",
      baseline: 58.33,
      methods: {
        keyword_objective_combined: 68.86
      }
    },
    {
      model: "Mixtral 8x22B",
      baseline: 48.00,
      methods: {
        keyword_objective_combined: 66.82
      }
    },
    {
      model: "Llama 4 Maverick Instruct",
      baseline: 45.00,
      methods: {
        keyword_objective_combined: 56.46
      }
    },
    {
      model: "GPT o3",
      baseline: 22.00,
      methods: {
        keyword_objective_combined: 30.53
      }
    },
    {
      model: "Claude 4 Sonnet",
      baseline: 26.33,
      methods: {
        keyword_objective_combined: 28.64
      }
    },
    {
      model: "Claude Opus 4.1", 
      baseline: 20.67,
      methods: {
        keyword_objective_combined: 23.56
      }
    },
    {
      model: "GPT 5",
      baseline: 8.33,
      methods: {
        keyword_objective_combined: 11.68,
        root_problem: 12.46
      }
    },
    {
      model: "GPT 5 mini",
      baseline: 7.67,
      methods: {
        keyword_objective_combined: 11.28,
        root_problem: 10.44
      }
    }
  ];

  const [currentPhase, setCurrentPhase] = useState('baseline');
  const [currentMethodIndex, setCurrentMethodIndex] = useState(0);
  
  const synthesisMethodsOrder = ['keyword_objective_combined', 'root_problem'];
  
  const phases = [
    { key: 'baseline', label: 'Human Baseline ASR' },
    { key: 'additive_synthesis', label: 'Human + Transformation Methods ASR' }
  ];

  useEffect(() => {
    const interval = setInterval(() => {
      setCurrentPhase(prev => prev === 'baseline' ? 'additive_synthesis' : 'baseline');
      setCurrentMethodIndex(0);
    }, 8000);

    return () => clearInterval(interval);
  }, []);

  useEffect(() => {
    if (currentPhase === 'additive_synthesis') {
      const methodInterval = setInterval(() => {
        setCurrentMethodIndex(prev => {
          const nextIndex = prev + 1;
          if (nextIndex > synthesisMethodsOrder.length) {
            return synthesisMethodsOrder.length;
          }
          return nextIndex;
        });
      }, 2000);
      return () => clearInterval(methodInterval);
    }
  }, [currentPhase]);

  const getCurrentValue = (modelData, phase) => {
    if (phase === 'baseline') {
      return modelData.baseline;
    } else if (phase === 'additive_synthesis') {
      // Show the highest ASR achieved by any transformation method tried so far
      let maxASR = modelData.baseline;
      
      for (let i = 0; i < currentMethodIndex; i++) {
        const method = synthesisMethodsOrder[i];
        if (modelData.methods[method] !== undefined) {
          maxASR = Math.max(maxASR, modelData.methods[method]);
        }
      }
      
      return maxASR;
    }
    return 0;
  };

  const getCurrentMethod = (modelData, phase) => {
    if (phase === 'baseline') return 'Human Baseline';
    if (currentMethodIndex === 0) return 'Human Baseline';
    
    const availableMethods = [];
    for (let i = 0; i < currentMethodIndex; i++) {
      const method = synthesisMethodsOrder[i];
      if (modelData.methods[method] !== undefined) {
        availableMethods.push(method);
      }
    }
    
    if (availableMethods.length === 0) return 'Human Baseline';
    
    const lastMethod = availableMethods[availableMethods.length - 1];
    if (lastMethod === 'keyword_objective_combined') return 'Keyword/Objective Transformation';
    if (lastMethod === 'root_problem') return 'Root Problem Transformation';
    
    return lastMethod.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
  };

  const getBarColor = (modelData, phase) => {
    if (phase === 'baseline') {
      return 'from-blue-500 to-blue-600';
    } else if (phase === 'additive_synthesis' && currentMethodIndex > 0) {
      return 'from-green-500 to-green-600';
    } else {
      return 'from-blue-500 to-blue-600';
    }
  };

  return (
    <div className="min-h-screen bg-gradient-to-br from-slate-900 to-slate-800 p-4">
      <div className="max-w-6xl mx-auto">
        {/* Header */}
        <div className="text-center mb-6">
          <h1 className="text-3xl font-bold text-white mb-3">
            LLM Attack Success Rate with Transformation Methods
          </h1>
          <p className="text-slate-300">
            SafetyBench Aug 2025 - Real ASR Calculations
          </p>
          
          {/* Methodology Note */}
          <div className="mt-4 p-3 bg-yellow-900/30 border border-yellow-500/30 rounded-lg max-w-4xl mx-auto">
            <div className="flex items-start space-x-3">
              <div className="text-yellow-400 mt-1">⚠️</div>
              <div className="text-left">
                <p className="text-yellow-200 font-semibold mb-2">Methodology Note</p>
                <p className="text-yellow-100 text-sm leading-relaxed">
                  <strong>Additive Visualization:</strong> This chart shows cumulative impact by progressively adding each transformation method's individual attack success rate. 
                  Values >100% represent transformation of multiple conversations off one failed, human seed conversation. 
                  Results are based on HarmBench Grading methodology and should be interpreted as relative performance indicators.
                </p>
              </div>
            </div>
          </div>
        </div>

        {/* Chart Container - Scrollable Box */}
        <div className="bg-white rounded-xl shadow-2xl p-4">
          <div className="h-96 overflow-y-auto pr-2">
            <div className="space-y-2">
              {benchmarkData.map((modelData, index) => {
                const currentValue = getCurrentValue(modelData, currentPhase);
                const baselineValue = modelData.baseline;
                const maxValue = 90;
                const barWidth = (currentValue / maxValue) * 100;
                const currentMethod = getCurrentMethod(modelData, currentPhase);
                const gain = currentValue - baselineValue;
                
                return (
                  <div key={modelData.model} className="relative">
                    {/* Model Name and Value */}
                    <div className="flex items-center justify-between mb-1">
                      <div>
                        <h3 className="font-semibold text-gray-800 text-sm">
                          {modelData.model}
                        </h3>
                        <p className="text-xs text-gray-600">
                          {currentMethod}
                        </p>
                      </div>
                      <div className="text-right">
                        <span className="text-lg font-bold text-gray-700">
                          {currentValue.toFixed(1)}%
                        </span>
                        {gain > 0 && (
                          <div className="text-xs font-semibold text-green-600">
                            +{gain.toFixed(1)} points
                          </div>
                        )}
                      </div>
                    </div>
                    
                    {/* Progress Bar */}
                    <div className="relative h-6 bg-gray-200 rounded-full overflow-hidden">
                      <div 
                        className={`h-full bg-gradient-to-r ${getBarColor(modelData, currentPhase)} rounded-full transition-all duration-2000 ease-out flex items-center justify-end pr-2`}
                        style={{ width: `${Math.max(barWidth, 5)}%` }}
                      >
                        <div className="text-white font-semibold text-xs">
                          {currentValue > 8 ? `${currentValue.toFixed(1)}%` : ''}
                        </div>
                      </div>
                    </div>
                  </div>
                );
              })}
            </div>
          </div>
          
          {/* Legend */}
          <div className="mt-4 pt-4 border-t border-gray-200 flex justify-center space-x-6 text-sm">
            <div className="flex items-center space-x-2">
              <div className="w-3 h-3 bg-gradient-to-r from-blue-500 to-blue-600 rounded"></div>
              <span className="text-gray-700">Human Baseline</span>
            </div>
            <div className="flex items-center space-x-2">
              <div className="w-3 h-3 bg-gradient-to-r from-green-500 to-green-600 rounded"></div>
              <span className="text-gray-700">With Transformation Methods</span>
            </div>
          </div>
        </div>
        
        {/* Footer */}
        <div className="mt-4 text-center text-slate-400 space-y-1">
          <p className="text-sm">
            Top performers: Grok 4 (85.15%), Deepseek R1-0528 (83.76%), Llama 3.1 405B (80.75%)
          </p>
          <p className="text-xs">
            Shows highest ASR achieved when combining human attempts with transformation methods
          </p>
        </div>
      </div>
    </div>
  );
};

export default BenchmarkChart;