Spaces:
Running
Running
File size: 2,286 Bytes
ed2eb44 cf2253a 06d4ee9 cf2253a 678bdbb ed2eb44 cf2253a 06d4ee9 cf2253a 678bdbb ed2eb44 cf2253a 678bdbb cf2253a 678bdbb cf2253a 678bdbb cf2253a 678bdbb ed2eb44 678bdbb cf2253a 678bdbb cf2253a ed2eb44 678bdbb a325fdc cf2253a a325fdc 678bdbb ed2eb44 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | {
"perception_temporal_action_loc": {
"MLAB (claude-3-5-sonnet-v2)": 0.8,
"Top Human in Competition": 100.0,
"MLAB (gemini-exp-1206)": -0.5,
"MLAB (o3-mini)": 0.3,
"MLAB (gpt-4o)": 0.3,
"MLAB (llama3-1-405b-instruct)": 0.5,
"CoI-Agent (o1) + MLAB (gpt-4o)": 0.4,
"Human Idea + MLAB (gpt-4o)": 0.5
},
"llm-merging": {
"CoI-Agent (o1) + MLAB (gpt-4o)": -1.0,
"Top Human in Competition": 100.0,
"MLAB (claude-3-5-sonnet-v2)": 5.0,
"MLAB (gemini-exp-1206)": 5.0,
"MLAB (o3-mini)": -1.0,
"MLAB (gpt-4o)": 2.0,
"MLAB (llama3-1-405b-instruct)": -1.0,
"Human Idea + MLAB (gpt-4o)": -1.0
},
"product-recommendation": {
"MLAB (claude-3-5-sonnet-v2)": 3.0,
"Top Human in Competition": 100.0,
"MLAB (gemini-exp-1206)": 0.1,
"MLAB (o3-mini)": 0.1,
"MLAB (gpt-4o)": 0.6,
"MLAB (llama3-1-405b-instruct)": -0.0,
"Human Idea + MLAB (gpt-4o)": 2.2,
"CoI-Agent (o1) + MLAB (gpt-4o)": 0.1
},
"weather_forcast": {
"CoI-Agent (o1) + MLAB (gpt-4o)": 39.4,
"Top Human in Competition": 100.0,
"Human Idea + MLAB (gpt-4o)": 12.3,
"MLAB (claude-3-5-sonnet-v2)": 14.6,
"MLAB (gemini-exp-1206)": 43.1,
"MLAB (o3-mini)": 25.1,
"MLAB (gpt-4o)": 47.5,
"MLAB (llama3-1-405b-instruct)": 31.5
},
"meta-learning": {
"MLAB (claude-3-5-sonnet-v2)": -4.9,
"Top Human in Competition": 100.0,
"MLAB (gemini-exp-1206)": -1.1,
"MLAB (o3-mini)": -4.9,
"MLAB (gpt-4o)": -4.9,
"MLAB (llama3-1-405b-instruct)": -4.9,
"Human Idea + MLAB (gpt-4o)": -4.9,
"CoI-Agent (o1) + MLAB (gpt-4o)": -4.9
},
"machine_unlearning": {
"Human Idea + MLAB (gpt-4o)": 6.8,
"Top Human in Competition": 100.0,
"CoI-Agent (o1) + MLAB (gpt-4o)": 11.8,
"MLAB (claude-3-5-sonnet-v2)": -94.7,
"MLAB (gemini-exp-1206)": 5.6,
"MLAB (o3-mini)": 3.6,
"MLAB (gpt-4o)": -18.0,
"MLAB (llama3-1-405b-instruct)": 6.2
},
"backdoor-trigger-recovery": {
"CoI-Agent (o1) + MLAB (gpt-4o)": 4.0,
"Top Human in Competition": 100.0,
"MLAB (claude-3-5-sonnet-v2)": 39.9,
"MLAB (gemini-exp-1206)": 12.9,
"MLAB (o3-mini)": 6.2,
"MLAB (gpt-4o)": 10.4,
"MLAB (llama3-1-405b-instruct)": 11.5,
"Human Idea + MLAB (gpt-4o)": 8.8
}
} |