Spaces:

GoJulyAI
/

benchmark-enhancements

Running

App Files Files Community

yoshinakachi commited on Aug 25

Commit

493ead8

verified ·

1 Parent(s): 5b4f4bd

Update src/App.js

Browse files

Files changed (1) hide show

src/App.js +43 -43

src/App.js CHANGED Viewed

@@ -1,93 +1,93 @@
 import React, { useState, useEffect } from 'react';
 const BenchmarkChart = () => {
-  // Real data sorted by Human Baseline (ascending)
   const benchmarkData = [
     {
-      model: "GPT 5 mini",
-      baseline: 7.67,
       methods: {
-        keyword_objective_combined: 11.28,
-        root_problem: 10.44
       }
     },
     {
-      model: "GPT 5",
-      baseline: 8.33,
       methods: {
-        keyword_objective_combined: 11.68,
-        root_problem: 12.46
       }
     },
     {
-      model: "Claude Opus 4.1",
-      baseline: 20.67,
       methods: {
-        keyword_objective_combined: 23.56
       }
     },
     {
-      model: "GPT o3",
-      baseline: 22.00,
       methods: {
-        keyword_objective_combined: 30.53
       }
     },
     {
-      model: "Claude 4 Sonnet",
-      baseline: 26.33,
       methods: {
-        keyword_objective_combined: 28.64
       }
     },
     {
-      model: "Llama 4 Maverick Instruct",
-      baseline: 45.00,
       methods: {
-        keyword_objective_combined: 56.46
       }
     },
     {
-      model: "Mixtral 8x22B",
-      baseline: 48.00,
       methods: {
-        keyword_objective_combined: 66.82
       }
     },
     {
-      model: "Gemini 2.5 Pro",
-      baseline: 55.67,
       methods: {
-        keyword_objective_combined: 74.14,
-        root_problem: 67.19
       }
     },
     {
-      model: "Llama 3 8B Instruct Reference",
-      baseline: 58.33,
       methods: {
-        keyword_objective_combined: 68.86
       }
     },
     {
-      model: "Llama 3.1 405B",
-      baseline: 67.00,
       methods: {
-        keyword_objective_combined: 80.75
       }
     },
     {
-      model: "Deepseek R1-0528",
-      baseline: 68.67,
       methods: {
-        keyword_objective_combined: 83.76
       }
     },
     {
-      model: "Grok 4",
-      baseline: 68.67,
       methods: {
-        keyword_objective_combined: 85.15
       }
     }
   ];
@@ -175,9 +175,9 @@ const BenchmarkChart = () => {
           </div>
         </div>
-        {/* Chart Container - Taller */}
         <div className="bg-white rounded-xl shadow-2xl p-4">
-          <div className="h-[600px] overflow-y-auto pr-2">
             <div className="space-y-2">
               {benchmarkData.map((modelData, index) => {
                 const currentValue = getCurrentValue(modelData, currentPhase);
@@ -274,7 +274,7 @@ const BenchmarkChart = () => {
         {/* Footer */}
         <div className="mt-4 text-center text-slate-400 space-y-1">
           <p className="text-sm">
-            Sorted by Human Baseline ASR • Click button above for manual control
           </p>
           <p className="text-xs">
             Bars extend from baseline to show transformation method impact

 import React, { useState, useEffect } from 'react';
 const BenchmarkChart = () => {
+  // Real data sorted by highest achievable ASR (largest bars at top)
   const benchmarkData = [
     {
+      model: "Grok 4",
+      baseline: 68.67,
       methods: {
+        keyword_objective_combined: 85.15
       }
     },
     {
+      model: "Deepseek R1-0528",
+      baseline: 68.67,
       methods: {
+        keyword_objective_combined: 83.76
       }
     },
     {
+      model: "Llama 3.1 405B",
+      baseline: 67.00,
       methods: {
+        keyword_objective_combined: 80.75
       }
     },
     {
+      model: "Gemini 2.5 Pro",
+      baseline: 55.67,
       methods: {
+        keyword_objective_combined: 74.14,
+        root_problem: 67.19
       }
     },
     {
+      model: "Llama 3 8B Instruct Reference",
+      baseline: 58.33,
       methods: {
+        keyword_objective_combined: 68.86
       }
     },
     {
+      model: "Mixtral 8x22B",
+      baseline: 48.00,
       methods: {
+        keyword_objective_combined: 66.82
       }
     },
     {
+      model: "Llama 4 Maverick Instruct",
+      baseline: 45.00,
       methods: {
+        keyword_objective_combined: 56.46
       }
     },
     {
+      model: "GPT o3",
+      baseline: 22.00,
       methods: {
+        keyword_objective_combined: 30.53
       }
     },
     {
+      model: "Claude 4 Sonnet",
+      baseline: 26.33,
       methods: {
+        keyword_objective_combined: 28.64
       }
     },
     {
+      model: "Claude Opus 4.1",
+      baseline: 20.67,
       methods: {
+        keyword_objective_combined: 23.56
       }
     },
     {
+      model: "GPT 5",
+      baseline: 8.33,
       methods: {
+        keyword_objective_combined: 11.68,
+        root_problem: 12.46
       }
     },
     {
+      model: "GPT 5 mini",
+      baseline: 7.67,
       methods: {
+        keyword_objective_combined: 11.28,
+        root_problem: 10.44
       }
     }
   ];
           </div>
         </div>
+        {/* Chart Container - Longer */}
         <div className="bg-white rounded-xl shadow-2xl p-4">
+          <div className="h-[700px] overflow-y-auto pr-2">
             <div className="space-y-2">
               {benchmarkData.map((modelData, index) => {
                 const currentValue = getCurrentValue(modelData, currentPhase);
         {/* Footer */}
         <div className="mt-4 text-center text-slate-400 space-y-1">
           <p className="text-sm">
+            Sorted by Maximum ASR (largest bars at top) • Click button above for manual control
           </p>
           <p className="text-xs">
             Bars extend from baseline to show transformation method impact