File size: 2,295 Bytes
06d4ee9
 
cf2253a
 
 
 
 
 
678bdbb
 
06d4ee9
 
cf2253a
 
 
 
 
 
678bdbb
 
06d4ee9
 
cf2253a
678bdbb
cf2253a
 
 
678bdbb
 
 
cf2253a
 
 
678bdbb
 
cf2253a
 
 
 
 
06d4ee9
678bdbb
 
 
 
 
 
 
 
 
 
06d4ee9
678bdbb
cf2253a
678bdbb
cf2253a
 
 
 
 
 
06d4ee9
678bdbb
a325fdc
cf2253a
 
 
a325fdc
678bdbb
 
06d4ee9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
{
  "perception_temporal_action_loc": {
    "MLAB (claude-3-5-sonnet-v2)": 2.2,
    "Top Human in Competition": 284.6,
    "MLAB (gemini-exp-1206)": -1.3,
    "MLAB (o3-mini)": 0.9,
    "MLAB (gpt-4o)": 0.9,
    "MLAB (llama3-1-405b-instruct)": 1.5,
    "CoI-Agent (o1) + MLAB (gpt-4o)": 1.0,
    "Human Idea + MLAB (gpt-4o)": 1.5
  },
  "llm-merging": {
    "CoI-Agent (o1) + MLAB (gpt-4o)": -0.7,
    "Top Human in Competition": 68.2,
    "MLAB (claude-3-5-sonnet-v2)": 3.4,
    "MLAB (gemini-exp-1206)": 3.4,
    "MLAB (o3-mini)": -0.7,
    "MLAB (gpt-4o)": 1.4,
    "MLAB (llama3-1-405b-instruct)": -0.7,
    "Human Idea + MLAB (gpt-4o)": -0.7
  },
  "product-recommendation": {
    "MLAB (claude-3-5-sonnet-v2)": 12.3,
    "Top Human in Competition": 412.6,
    "MLAB (gemini-exp-1206)": 0.6,
    "MLAB (o3-mini)": 0.6,
    "MLAB (gpt-4o)": 2.6,
    "MLAB (llama3-1-405b-instruct)": -0.0,
    "Human Idea + MLAB (gpt-4o)": 8.9,
    "CoI-Agent (o1) + MLAB (gpt-4o)": 0.6
  },
  "weather_forcast": {
    "CoI-Agent (o1) + MLAB (gpt-4o)": 83.6,
    "Top Human in Competition": 212.0,
    "Human Idea + MLAB (gpt-4o)": 26.1,
    "MLAB (claude-3-5-sonnet-v2)": 31.0,
    "MLAB (gemini-exp-1206)": 91.4,
    "MLAB (o3-mini)": 53.3,
    "MLAB (gpt-4o)": 100.8,
    "MLAB (llama3-1-405b-instruct)": 66.7
  },
  "meta-learning": {
    "MLAB (claude-3-5-sonnet-v2)": -14.9,
    "Top Human in Competition": 304.5,
    "MLAB (gemini-exp-1206)": -3.2,
    "MLAB (o3-mini)": -14.9,
    "MLAB (gpt-4o)": -14.9,
    "MLAB (llama3-1-405b-instruct)": -14.9,
    "Human Idea + MLAB (gpt-4o)": -14.9,
    "CoI-Agent (o1) + MLAB (gpt-4o)": -14.9
  },
  "machine_unlearning": {
    "Human Idea + MLAB (gpt-4o)": 4.2,
    "Top Human in Competition": 61.9,
    "CoI-Agent (o1) + MLAB (gpt-4o)": 7.3,
    "MLAB (claude-3-5-sonnet-v2)": -58.6,
    "MLAB (gemini-exp-1206)": 3.5,
    "MLAB (o3-mini)": 2.2,
    "MLAB (gpt-4o)": -11.1,
    "MLAB (llama3-1-405b-instruct)": 3.8
  },
  "backdoor-trigger-recovery": {
    "CoI-Agent (o1) + MLAB (gpt-4o)": 24.9,
    "Top Human in Competition": 621.3,
    "MLAB (claude-3-5-sonnet-v2)": 247.9,
    "MLAB (gemini-exp-1206)": 80.4,
    "MLAB (o3-mini)": 38.8,
    "MLAB (gpt-4o)": 64.5,
    "MLAB (llama3-1-405b-instruct)": 71.7,
    "Human Idea + MLAB (gpt-4o)": 54.5
  }
}