Spaces:
Running
Running
File size: 2,295 Bytes
06d4ee9 cf2253a 678bdbb 06d4ee9 cf2253a 678bdbb 06d4ee9 cf2253a 678bdbb cf2253a 678bdbb cf2253a 678bdbb cf2253a 06d4ee9 678bdbb 06d4ee9 678bdbb cf2253a 678bdbb cf2253a 06d4ee9 678bdbb a325fdc cf2253a a325fdc 678bdbb 06d4ee9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
{
"perception_temporal_action_loc": {
"MLAB (claude-3-5-sonnet-v2)": 2.2,
"Top Human in Competition": 284.6,
"MLAB (gemini-exp-1206)": -1.3,
"MLAB (o3-mini)": 0.9,
"MLAB (gpt-4o)": 0.9,
"MLAB (llama3-1-405b-instruct)": 1.5,
"CoI-Agent (o1) + MLAB (gpt-4o)": 1.0,
"Human Idea + MLAB (gpt-4o)": 1.5
},
"llm-merging": {
"CoI-Agent (o1) + MLAB (gpt-4o)": -0.7,
"Top Human in Competition": 68.2,
"MLAB (claude-3-5-sonnet-v2)": 3.4,
"MLAB (gemini-exp-1206)": 3.4,
"MLAB (o3-mini)": -0.7,
"MLAB (gpt-4o)": 1.4,
"MLAB (llama3-1-405b-instruct)": -0.7,
"Human Idea + MLAB (gpt-4o)": -0.7
},
"product-recommendation": {
"MLAB (claude-3-5-sonnet-v2)": 12.3,
"Top Human in Competition": 412.6,
"MLAB (gemini-exp-1206)": 0.6,
"MLAB (o3-mini)": 0.6,
"MLAB (gpt-4o)": 2.6,
"MLAB (llama3-1-405b-instruct)": -0.0,
"Human Idea + MLAB (gpt-4o)": 8.9,
"CoI-Agent (o1) + MLAB (gpt-4o)": 0.6
},
"weather_forcast": {
"CoI-Agent (o1) + MLAB (gpt-4o)": 83.6,
"Top Human in Competition": 212.0,
"Human Idea + MLAB (gpt-4o)": 26.1,
"MLAB (claude-3-5-sonnet-v2)": 31.0,
"MLAB (gemini-exp-1206)": 91.4,
"MLAB (o3-mini)": 53.3,
"MLAB (gpt-4o)": 100.8,
"MLAB (llama3-1-405b-instruct)": 66.7
},
"meta-learning": {
"MLAB (claude-3-5-sonnet-v2)": -14.9,
"Top Human in Competition": 304.5,
"MLAB (gemini-exp-1206)": -3.2,
"MLAB (o3-mini)": -14.9,
"MLAB (gpt-4o)": -14.9,
"MLAB (llama3-1-405b-instruct)": -14.9,
"Human Idea + MLAB (gpt-4o)": -14.9,
"CoI-Agent (o1) + MLAB (gpt-4o)": -14.9
},
"machine_unlearning": {
"Human Idea + MLAB (gpt-4o)": 4.2,
"Top Human in Competition": 61.9,
"CoI-Agent (o1) + MLAB (gpt-4o)": 7.3,
"MLAB (claude-3-5-sonnet-v2)": -58.6,
"MLAB (gemini-exp-1206)": 3.5,
"MLAB (o3-mini)": 2.2,
"MLAB (gpt-4o)": -11.1,
"MLAB (llama3-1-405b-instruct)": 3.8
},
"backdoor-trigger-recovery": {
"CoI-Agent (o1) + MLAB (gpt-4o)": 24.9,
"Top Human in Competition": 621.3,
"MLAB (claude-3-5-sonnet-v2)": 247.9,
"MLAB (gemini-exp-1206)": 80.4,
"MLAB (o3-mini)": 38.8,
"MLAB (gpt-4o)": 64.5,
"MLAB (llama3-1-405b-instruct)": 71.7,
"Human Idea + MLAB (gpt-4o)": 54.5
}
} |