yoshinakachi commited on
Commit
2607058
·
verified ·
1 Parent(s): c9c02ef

Update src/App.js

Browse files
Files changed (1) hide show
  1. src/App.js +270 -20
src/App.js CHANGED
@@ -1,25 +1,275 @@
1
- import logo from './logo.svg';
2
- import './App.css';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- function App() {
5
  return (
6
- <div className="App">
7
- <header className="App-header">
8
- <img src={logo} className="App-logo" alt="logo" />
9
- <p>
10
- Edit <code>src/App.js</code> and save to reload.
11
- </p>
12
- <a
13
- className="App-link"
14
- href="https://reactjs.org"
15
- target="_blank"
16
- rel="noopener noreferrer"
17
- >
18
- Learn React
19
- </a>
20
- </header>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  </div>
22
  );
23
- }
24
 
25
- export default App;
 
1
+ import React, { useState, useEffect } from 'react';
2
+
3
+ const BenchmarkChart = () => {
4
+ // Real data from your CSV
5
+ const benchmarkData = [
6
+ {
7
+ model: "Claude 4 Sonnet",
8
+ direct_conversation: 26.33,
9
+ keyword_objective_combined: 3.13
10
+ },
11
+ {
12
+ model: "Claude Opus 4.1",
13
+ direct_conversation: 20.67,
14
+ keyword_objective_combined: 3.65
15
+ },
16
+ {
17
+ model: "Deepseek R1-0528",
18
+ direct_conversation: 68.67,
19
+ keyword_objective_combined: 48.18
20
+ },
21
+ {
22
+ model: "GPT 5",
23
+ direct_conversation: 8.33,
24
+ keyword_objective_combined: 3.65,
25
+ bio_topic_change: 23.5,
26
+ enhancement: 10,
27
+ root_problem: 4.5
28
+ },
29
+ {
30
+ model: "GPT 5 mini",
31
+ direct_conversation: 7.67,
32
+ keyword_objective_combined: 3.91,
33
+ bio_topic_change: 14.5,
34
+ enhancement: 5.5,
35
+ root_problem: 3
36
+ },
37
+ {
38
+ model: "GPT o3",
39
+ direct_conversation: 22,
40
+ keyword_objective_combined: 10.94
41
+ },
42
+ {
43
+ model: "Gemini 2.5 Pro",
44
+ direct_conversation: 55.67,
45
+ keyword_objective_combined: 41.67,
46
+ bio_topic_change: 53.5,
47
+ enhancement: 47,
48
+ root_problem: 26
49
+ },
50
+ {
51
+ model: "Grok 4",
52
+ direct_conversation: 68.67,
53
+ keyword_objective_combined: 52.6
54
+ },
55
+ {
56
+ model: "Llama 3.1 405B",
57
+ direct_conversation: 67,
58
+ keyword_objective_combined: 41.67
59
+ }
60
+ ];
61
+
62
+ const [currentPhase, setCurrentPhase] = useState('baseline');
63
+ const [currentMethodIndex, setCurrentMethodIndex] = useState(0);
64
+
65
+ const synthesisMethodsOrder = ['keyword_objective_combined', 'bio_topic_change', 'enhancement', 'root_problem'];
66
+
67
+ const phases = [
68
+ { key: 'baseline', label: 'Direct Conversation (Baseline)' },
69
+ { key: 'additive_synthesis', label: 'Adding Synthesis Methods' }
70
+ ];
71
+
72
+ useEffect(() => {
73
+ const interval = setInterval(() => {
74
+ setCurrentPhase(prev => prev === 'baseline' ? 'additive_synthesis' : 'baseline');
75
+ setCurrentMethodIndex(0); // Reset when switching phases
76
+ }, 12000); // Increased total cycle time to 12 seconds
77
+
78
+ return () => clearInterval(interval);
79
+ }, []);
80
+
81
+ useEffect(() => {
82
+ if (currentPhase === 'additive_synthesis') {
83
+ const methodInterval = setInterval(() => {
84
+ setCurrentMethodIndex(prev => {
85
+ const nextIndex = prev + 1;
86
+ // Stay at final state (all methods added) for longer
87
+ if (nextIndex > synthesisMethodsOrder.length) {
88
+ return synthesisMethodsOrder.length; // Stay at max for longer
89
+ }
90
+ return nextIndex;
91
+ });
92
+ }, 2000); // Slower progression - 2 seconds per method
93
+ return () => clearInterval(methodInterval);
94
+ }
95
+ }, [currentPhase]);
96
+
97
+ const getCurrentValue = (modelData, phase) => {
98
+ if (phase === 'baseline') {
99
+ return modelData.direct_conversation || 0;
100
+ } else if (phase === 'additive_synthesis') {
101
+ let cumulativeValue = modelData.direct_conversation || 0;
102
+
103
+ // Add each synthesis method's contribution up to currentMethodIndex
104
+ for (let i = 0; i < currentMethodIndex; i++) {
105
+ const method = synthesisMethodsOrder[i];
106
+ if (modelData[method] !== undefined) {
107
+ cumulativeValue += modelData[method];
108
+ }
109
+ }
110
+
111
+ return cumulativeValue;
112
+ }
113
+ return 0;
114
+ };
115
+
116
+ const getCurrentMethodsAdded = (modelData, phase) => {
117
+ if (phase === 'baseline') return ['Direct Conversation'];
118
+
119
+ const methods = ['Direct Conversation'];
120
+ for (let i = 0; i < currentMethodIndex; i++) {
121
+ const method = synthesisMethodsOrder[i];
122
+ if (modelData[method] !== undefined) {
123
+ methods.push(method.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase()));
124
+ }
125
+ }
126
+ return methods;
127
+ };
128
+
129
+ const getBarColor = (modelData, phase) => {
130
+ if (phase === 'baseline') {
131
+ return 'from-blue-500 to-blue-600';
132
+ } else {
133
+ // Green gradient for additive synthesis
134
+ return 'from-green-500 to-green-600';
135
+ }
136
+ };
137
 
 
138
  return (
139
+ <div className="min-h-screen bg-gradient-to-br from-slate-900 to-slate-800 p-8">
140
+ <div className="max-w-6xl mx-auto">
141
+ {/* Header */}
142
+ <div className="text-center mb-12">
143
+ <h1 className="text-4xl font-bold text-white mb-4">
144
+ LLM Safety Benchmark Results
145
+ </h1>
146
+ <p className="text-slate-300 text-lg">
147
+ SafetyBench Aug 2025 - Success Rate Comparison
148
+ </p>
149
+
150
+ {/* Methodology Disclaimer */}
151
+ <div className="mt-6 p-4 bg-yellow-900/30 border border-yellow-500/30 rounded-lg max-w-4xl mx-auto">
152
+ <div className="flex items-start space-x-3">
153
+ <div className="text-yellow-400 mt-1">⚠️</div>
154
+ <div className="text-left">
155
+ <p className="text-yellow-200 font-semibold mb-2">Methodology Note</p>
156
+ <p className="text-yellow-100 text-sm leading-relaxed">
157
+ <strong>Additive Visualization:</strong> This chart shows cumulative impact by progressively adding each synthesis method's individual success rate.
158
+ Values >100% represent theoretical maximum vulnerability discovery when combining multiple attack vectors.
159
+ Results are based on SafetyBench Aug 2025 testing methodology and should be interpreted as relative performance indicators.
160
+ </p>
161
+ </div>
162
+ </div>
163
+ </div>
164
+
165
+ <div className="mt-4 p-4 bg-slate-800 rounded-lg inline-block">
166
+ <p className="text-white font-semibold">
167
+ Current View: {phases.find(p => p.key === currentPhase)?.label}
168
+ </p>
169
+ {currentPhase === 'additive_synthesis' && currentMethodIndex > 0 && (
170
+ <p className="text-slate-300 text-sm mt-1">
171
+ Adding Method {currentMethodIndex}: {synthesisMethodsOrder[currentMethodIndex - 1]?.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase())}
172
+ </p>
173
+ )}
174
+ </div>
175
+ </div>
176
+
177
+ {/* Chart Container */}
178
+ <div className="bg-white rounded-2xl shadow-2xl p-8">
179
+ <div className="space-y-6">
180
+ {benchmarkData.map((modelData, index) => {
181
+ const currentValue = getCurrentValue(modelData, currentPhase);
182
+ const baselineValue = modelData.direct_conversation;
183
+ const maxValue = 100; // Increased max scale since we're adding values
184
+ const barWidth = (currentValue / maxValue) * 100;
185
+ const methodsAdded = getCurrentMethodsAdded(modelData, currentPhase);
186
+ const totalGain = currentValue - baselineValue;
187
+
188
+ return (
189
+ <div key={modelData.model} className="relative">
190
+ {/* Model Name and Methods */}
191
+ <div className="flex items-center justify-between mb-2">
192
+ <div>
193
+ <h3 className="font-semibold text-gray-800 text-lg">
194
+ {modelData.model}
195
+ </h3>
196
+ <p className="text-sm text-gray-600">
197
+ {methodsAdded.join(' + ')}
198
+ </p>
199
+ </div>
200
+ <div className="text-right">
201
+ <span className="text-2xl font-bold text-gray-700">
202
+ {currentValue.toFixed(1)}%
203
+ </span>
204
+ {currentPhase === 'additive_synthesis' && totalGain > 0 && (
205
+ <div className="text-sm font-semibold text-green-600">
206
+ +{totalGain.toFixed(1)}% total gain
207
+ </div>
208
+ )}
209
+ </div>
210
+ </div>
211
+
212
+ {/* Progress Bar */}
213
+ <div className="relative h-12 bg-gray-200 rounded-full overflow-hidden">
214
+ <div
215
+ className={`h-full bg-gradient-to-r ${getBarColor(modelData, currentPhase)} rounded-full transition-all duration-[1800ms] ease-in-out flex items-center justify-end pr-4`}
216
+ style={{ width: `${Math.max(barWidth, 5)}%` }}
217
+ >
218
+ <div className="text-white font-semibold text-sm">
219
+ {currentValue > 8 ? `${currentValue.toFixed(1)}%` : ''}
220
+ </div>
221
+ </div>
222
+ </div>
223
+
224
+ {/* Method Breakdown */}
225
+ {currentPhase === 'additive_synthesis' && currentMethodIndex > 0 && (
226
+ <div className="mt-2 text-xs text-gray-500 space-y-1">
227
+ <div>Baseline: {baselineValue.toFixed(1)}%</div>
228
+ {synthesisMethodsOrder.slice(0, currentMethodIndex).map(method => {
229
+ if (modelData[method] !== undefined) {
230
+ return (
231
+ <div key={method}>
232
+ + {method.replace(/_/g, ' ')}: {modelData[method].toFixed(1)}%
233
+ </div>
234
+ );
235
+ }
236
+ return null;
237
+ })}
238
+ </div>
239
+ )}
240
+ </div>
241
+ );
242
+ })}
243
+ </div>
244
+
245
+ {/* Legend */}
246
+ <div className="mt-8 flex justify-center space-x-6 flex-wrap">
247
+ <div className="flex items-center space-x-2">
248
+ <div className="w-4 h-4 bg-gradient-to-r from-blue-500 to-blue-600 rounded"></div>
249
+ <span className="text-gray-700">Baseline (Direct Conversation)</span>
250
+ </div>
251
+ <div className="flex items-center space-x-2">
252
+ <div className="w-4 h-4 bg-gradient-to-r from-green-500 to-green-600 rounded"></div>
253
+ <span className="text-gray-700">Additive Synthesis Methods</span>
254
+ </div>
255
+ </div>
256
+ </div>
257
+
258
+ {/* Footer Info */}
259
+ <div className="mt-8 text-center text-slate-400 space-y-2">
260
+ <p>Animation cycles every 12 seconds: baseline (6s) → progressive method addition (2s each) → final results (4s)</p>
261
+ <p className="text-sm">
262
+ Data from SafetyBench Aug 2025 • Synthesis methods test different attack vectors
263
+ </p>
264
+ <div className="text-xs mt-4 max-w-4xl mx-auto space-y-1">
265
+ <p><strong>Interpretation:</strong> Each synthesis method represents a different approach to testing model vulnerabilities</p>
266
+ <p><strong>Additive Display:</strong> Shows theoretical maximum impact when combining all available synthesis techniques</p>
267
+ <p><strong>Baseline:</strong> Direct conversation represents standard prompting without augmentation techniques</p>
268
+ </div>
269
+ </div>
270
+ </div>
271
  </div>
272
  );
273
+ };
274
 
275
+ export default BenchmarkChart;