MLRC_Bench / src /data /metrics /margin_to_human.json
Armeddinosaur's picture
Adding MLRC Bench
ed2eb44
raw
history blame
2.01 kB
{
"perception_temporal_action_loc": {
"MLAB (claude-3-5-sonnet-v2)": 0.7810185077440877,
"MLAB (gemini-exp-1206)": -0.4731328246392113,
"MLAB (o3-mini)": 0.3066106841553126,
"MLAB (gpt-4o)": 0.3298075630252947,
"MLAB (llama3-1-405b-instruct)": 0.5183240203504569,
"CoI-Agent (o1) + MLAB (gpt-4o)": 0.3475212791527979
},
"llm-merging": {
"CoI-Agent (o1) + MLAB (gpt-4o)": -0.9900989999019761,
"MLAB (claude-3-5-sonnet-v2)": 4.950495058915793,
"MLAB (gemini-exp-1206)": 4.950495058915793,
"MLAB (o3-mini)": -0.9900989999019761,
"MLAB (gpt-4o)": 1.9801980295069084,
"MLAB (llama3-1-405b-instruct)": -0.9900989999019761
},
"meta-learning": {
"CoI-Agent (o1) + MLAB (gpt-4o)": 1.781401026144938,
"MLAB (claude-3-5-sonnet-v2)": 1.781401026144938,
"MLAB (gemini-exp-1206)": 1.781401026144938,
"MLAB (o3-mini)": -4.900331256476853,
"MLAB (gpt-4o)": 1.781401026144938,
"MLAB (llama3-1-405b-instruct)": 1.781401026144938
},
"product-recommendation": {
"CoI-Agent (o1) + MLAB (gpt-4o)": 0.1459345029718814,
"MLAB (claude-3-5-sonnet-v2)": 2.9771372473170388,
"MLAB (gemini-exp-1206)": 0.1459345029718814,
"MLAB (o3-mini)": 0.1462759705510577,
"MLAB (gpt-4o)": 0.6398666846799662,
"MLAB (llama3-1-405b-instruct)": -7.044800459739471e-10
},
"machine_unlearning": {
"CoI-Agent (o1) + MLAB (gpt-4o)": 11.832138969791846,
"MLAB (claude-3-5-sonnet-v2)": -94.71778374121965,
"MLAB (gemini-exp-1206)": 5.632371576335568,
"MLAB (o3-mini)": 3.623856546073656,
"MLAB (gpt-4o)": -17.996962489965668,
"MLAB (llama3-1-405b-instruct)": 6.2098517833311
},
"backdoor-trigger-recovery": {
"CoI-Agent (o1) + MLAB (gpt-4o)": 6.1572772457753295,
"MLAB (claude-3-5-sonnet-v2)": 39.903815022493674,
"MLAB (gemini-exp-1206)": 12.94287662739089,
"MLAB (o3-mini)": 6.238823700218141,
"MLAB (gpt-4o)": 10.386627431983776,
"MLAB (llama3-1-405b-instruct)": 11.542228789066877
}
}