Spaces:
Running
Running
| { | |
| "perception_temporal_action_loc": { | |
| "MLAB (claude-3-5-sonnet-v2)": 0.8, | |
| "Top Human in Competition": 100.0, | |
| "MLAB (gemini-exp-1206)": -0.5, | |
| "MLAB (o3-mini)": 0.3, | |
| "MLAB (gpt-4o)": 0.3, | |
| "MLAB (llama3-1-405b-instruct)": 0.5, | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 0.4, | |
| "Human Idea + MLAB (gpt-4o)": 0.5 | |
| }, | |
| "llm-merging": { | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": -1.0, | |
| "Top Human in Competition": 100.0, | |
| "MLAB (claude-3-5-sonnet-v2)": 5.0, | |
| "MLAB (gemini-exp-1206)": 5.0, | |
| "MLAB (o3-mini)": -1.0, | |
| "MLAB (gpt-4o)": 2.0, | |
| "MLAB (llama3-1-405b-instruct)": -1.0, | |
| "Human Idea + MLAB (gpt-4o)": -1.0 | |
| }, | |
| "product-recommendation": { | |
| "MLAB (claude-3-5-sonnet-v2)": 3.0, | |
| "Top Human in Competition": 100.0, | |
| "MLAB (gemini-exp-1206)": 0.1, | |
| "MLAB (o3-mini)": 0.1, | |
| "MLAB (gpt-4o)": 0.6, | |
| "MLAB (llama3-1-405b-instruct)": -0.0, | |
| "Human Idea + MLAB (gpt-4o)": 2.2, | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 0.1 | |
| }, | |
| "weather_forcast": { | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 39.4, | |
| "Top Human in Competition": 100.0, | |
| "Human Idea + MLAB (gpt-4o)": 12.3, | |
| "MLAB (claude-3-5-sonnet-v2)": 14.6, | |
| "MLAB (gemini-exp-1206)": 43.1, | |
| "MLAB (o3-mini)": 25.1, | |
| "MLAB (gpt-4o)": 47.5, | |
| "MLAB (llama3-1-405b-instruct)": 31.5 | |
| }, | |
| "meta-learning": { | |
| "MLAB (claude-3-5-sonnet-v2)": -4.9, | |
| "Top Human in Competition": 100.0, | |
| "MLAB (gemini-exp-1206)": -1.1, | |
| "MLAB (o3-mini)": -4.9, | |
| "MLAB (gpt-4o)": -4.9, | |
| "MLAB (llama3-1-405b-instruct)": -4.9, | |
| "Human Idea + MLAB (gpt-4o)": -4.9, | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": -4.9 | |
| }, | |
| "machine_unlearning": { | |
| "Human Idea + MLAB (gpt-4o)": 6.8, | |
| "Top Human in Competition": 100.0, | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 11.8, | |
| "MLAB (claude-3-5-sonnet-v2)": -94.7, | |
| "MLAB (gemini-exp-1206)": 5.6, | |
| "MLAB (o3-mini)": 3.6, | |
| "MLAB (gpt-4o)": -18.0, | |
| "MLAB (llama3-1-405b-instruct)": 6.2 | |
| }, | |
| "backdoor-trigger-recovery": { | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 4.0, | |
| "Top Human in Competition": 100.0, | |
| "MLAB (claude-3-5-sonnet-v2)": 39.9, | |
| "MLAB (gemini-exp-1206)": 12.9, | |
| "MLAB (o3-mini)": 6.2, | |
| "MLAB (gpt-4o)": 10.4, | |
| "MLAB (llama3-1-405b-instruct)": 11.5, | |
| "Human Idea + MLAB (gpt-4o)": 8.8 | |
| } | |
| } |