Spaces:
Running
Running
Update src/App.js
Browse files- src/App.js +43 -43
src/App.js
CHANGED
|
@@ -1,93 +1,93 @@
|
|
| 1 |
import React, { useState, useEffect } from 'react';
|
| 2 |
|
| 3 |
const BenchmarkChart = () => {
|
| 4 |
-
// Real data sorted by
|
| 5 |
const benchmarkData = [
|
| 6 |
{
|
| 7 |
-
model: "
|
| 8 |
-
baseline:
|
| 9 |
methods: {
|
| 10 |
-
keyword_objective_combined:
|
| 11 |
-
root_problem: 10.44
|
| 12 |
}
|
| 13 |
},
|
| 14 |
{
|
| 15 |
-
model: "
|
| 16 |
-
baseline:
|
| 17 |
methods: {
|
| 18 |
-
keyword_objective_combined:
|
| 19 |
-
root_problem: 12.46
|
| 20 |
}
|
| 21 |
},
|
| 22 |
{
|
| 23 |
-
model: "
|
| 24 |
-
baseline:
|
| 25 |
methods: {
|
| 26 |
-
keyword_objective_combined:
|
| 27 |
}
|
| 28 |
},
|
| 29 |
{
|
| 30 |
-
model: "
|
| 31 |
-
baseline:
|
| 32 |
methods: {
|
| 33 |
-
keyword_objective_combined:
|
|
|
|
| 34 |
}
|
| 35 |
},
|
| 36 |
{
|
| 37 |
-
model: "
|
| 38 |
-
baseline:
|
| 39 |
methods: {
|
| 40 |
-
keyword_objective_combined:
|
| 41 |
}
|
| 42 |
},
|
| 43 |
{
|
| 44 |
-
model: "
|
| 45 |
-
baseline:
|
| 46 |
methods: {
|
| 47 |
-
keyword_objective_combined:
|
| 48 |
}
|
| 49 |
},
|
| 50 |
{
|
| 51 |
-
model: "
|
| 52 |
-
baseline:
|
| 53 |
methods: {
|
| 54 |
-
keyword_objective_combined:
|
| 55 |
}
|
| 56 |
},
|
| 57 |
{
|
| 58 |
-
model: "
|
| 59 |
-
baseline:
|
| 60 |
methods: {
|
| 61 |
-
keyword_objective_combined:
|
| 62 |
-
root_problem: 67.19
|
| 63 |
}
|
| 64 |
},
|
| 65 |
{
|
| 66 |
-
model: "
|
| 67 |
-
baseline:
|
| 68 |
methods: {
|
| 69 |
-
keyword_objective_combined:
|
| 70 |
}
|
| 71 |
},
|
| 72 |
{
|
| 73 |
-
model: "
|
| 74 |
-
baseline: 67
|
| 75 |
methods: {
|
| 76 |
-
keyword_objective_combined:
|
| 77 |
}
|
| 78 |
},
|
| 79 |
{
|
| 80 |
-
model: "
|
| 81 |
-
baseline:
|
| 82 |
methods: {
|
| 83 |
-
keyword_objective_combined:
|
|
|
|
| 84 |
}
|
| 85 |
},
|
| 86 |
{
|
| 87 |
-
model: "
|
| 88 |
-
baseline:
|
| 89 |
methods: {
|
| 90 |
-
keyword_objective_combined:
|
|
|
|
| 91 |
}
|
| 92 |
}
|
| 93 |
];
|
|
@@ -175,9 +175,9 @@ const BenchmarkChart = () => {
|
|
| 175 |
</div>
|
| 176 |
</div>
|
| 177 |
|
| 178 |
-
{/* Chart Container -
|
| 179 |
<div className="bg-white rounded-xl shadow-2xl p-4">
|
| 180 |
-
<div className="h-[
|
| 181 |
<div className="space-y-2">
|
| 182 |
{benchmarkData.map((modelData, index) => {
|
| 183 |
const currentValue = getCurrentValue(modelData, currentPhase);
|
|
@@ -274,7 +274,7 @@ const BenchmarkChart = () => {
|
|
| 274 |
{/* Footer */}
|
| 275 |
<div className="mt-4 text-center text-slate-400 space-y-1">
|
| 276 |
<p className="text-sm">
|
| 277 |
-
Sorted by
|
| 278 |
</p>
|
| 279 |
<p className="text-xs">
|
| 280 |
Bars extend from baseline to show transformation method impact
|
|
|
|
| 1 |
import React, { useState, useEffect } from 'react';
|
| 2 |
|
| 3 |
const BenchmarkChart = () => {
|
| 4 |
+
// Real data sorted by highest achievable ASR (largest bars at top)
|
| 5 |
const benchmarkData = [
|
| 6 |
{
|
| 7 |
+
model: "Grok 4",
|
| 8 |
+
baseline: 68.67,
|
| 9 |
methods: {
|
| 10 |
+
keyword_objective_combined: 85.15
|
|
|
|
| 11 |
}
|
| 12 |
},
|
| 13 |
{
|
| 14 |
+
model: "Deepseek R1-0528",
|
| 15 |
+
baseline: 68.67,
|
| 16 |
methods: {
|
| 17 |
+
keyword_objective_combined: 83.76
|
|
|
|
| 18 |
}
|
| 19 |
},
|
| 20 |
{
|
| 21 |
+
model: "Llama 3.1 405B",
|
| 22 |
+
baseline: 67.00,
|
| 23 |
methods: {
|
| 24 |
+
keyword_objective_combined: 80.75
|
| 25 |
}
|
| 26 |
},
|
| 27 |
{
|
| 28 |
+
model: "Gemini 2.5 Pro",
|
| 29 |
+
baseline: 55.67,
|
| 30 |
methods: {
|
| 31 |
+
keyword_objective_combined: 74.14,
|
| 32 |
+
root_problem: 67.19
|
| 33 |
}
|
| 34 |
},
|
| 35 |
{
|
| 36 |
+
model: "Llama 3 8B Instruct Reference",
|
| 37 |
+
baseline: 58.33,
|
| 38 |
methods: {
|
| 39 |
+
keyword_objective_combined: 68.86
|
| 40 |
}
|
| 41 |
},
|
| 42 |
{
|
| 43 |
+
model: "Mixtral 8x22B",
|
| 44 |
+
baseline: 48.00,
|
| 45 |
methods: {
|
| 46 |
+
keyword_objective_combined: 66.82
|
| 47 |
}
|
| 48 |
},
|
| 49 |
{
|
| 50 |
+
model: "Llama 4 Maverick Instruct",
|
| 51 |
+
baseline: 45.00,
|
| 52 |
methods: {
|
| 53 |
+
keyword_objective_combined: 56.46
|
| 54 |
}
|
| 55 |
},
|
| 56 |
{
|
| 57 |
+
model: "GPT o3",
|
| 58 |
+
baseline: 22.00,
|
| 59 |
methods: {
|
| 60 |
+
keyword_objective_combined: 30.53
|
|
|
|
| 61 |
}
|
| 62 |
},
|
| 63 |
{
|
| 64 |
+
model: "Claude 4 Sonnet",
|
| 65 |
+
baseline: 26.33,
|
| 66 |
methods: {
|
| 67 |
+
keyword_objective_combined: 28.64
|
| 68 |
}
|
| 69 |
},
|
| 70 |
{
|
| 71 |
+
model: "Claude Opus 4.1",
|
| 72 |
+
baseline: 20.67,
|
| 73 |
methods: {
|
| 74 |
+
keyword_objective_combined: 23.56
|
| 75 |
}
|
| 76 |
},
|
| 77 |
{
|
| 78 |
+
model: "GPT 5",
|
| 79 |
+
baseline: 8.33,
|
| 80 |
methods: {
|
| 81 |
+
keyword_objective_combined: 11.68,
|
| 82 |
+
root_problem: 12.46
|
| 83 |
}
|
| 84 |
},
|
| 85 |
{
|
| 86 |
+
model: "GPT 5 mini",
|
| 87 |
+
baseline: 7.67,
|
| 88 |
methods: {
|
| 89 |
+
keyword_objective_combined: 11.28,
|
| 90 |
+
root_problem: 10.44
|
| 91 |
}
|
| 92 |
}
|
| 93 |
];
|
|
|
|
| 175 |
</div>
|
| 176 |
</div>
|
| 177 |
|
| 178 |
+
{/* Chart Container - Longer */}
|
| 179 |
<div className="bg-white rounded-xl shadow-2xl p-4">
|
| 180 |
+
<div className="h-[700px] overflow-y-auto pr-2">
|
| 181 |
<div className="space-y-2">
|
| 182 |
{benchmarkData.map((modelData, index) => {
|
| 183 |
const currentValue = getCurrentValue(modelData, currentPhase);
|
|
|
|
| 274 |
{/* Footer */}
|
| 275 |
<div className="mt-4 text-center text-slate-400 space-y-1">
|
| 276 |
<p className="text-sm">
|
| 277 |
+
Sorted by Maximum ASR (largest bars at top) • Click button above for manual control
|
| 278 |
</p>
|
| 279 |
<p className="text-xs">
|
| 280 |
Bars extend from baseline to show transformation method impact
|