openhands-index / data /1.0.0-dev1 /agenteval.json
openhands
Initial OpenHands Index leaderboard based on ASTA Bench
085a012
raw
history blame
2.51 kB
{
"suite_config": {
"name": "openhands-index",
"version": "1.0.0-dev1",
"splits": [
{
"name": "validation",
"tasks": [
{
"name": "swe-bench",
"path": "openhands/swe-bench",
"primary_metric": "resolved/mean",
"tags": ["swe-bench"]
},
{
"name": "multi-swe-bench",
"path": "openhands/multi-swe-bench",
"primary_metric": "resolved/mean",
"tags": ["multi-swe-bench"]
},
{
"name": "swe-bench-multimodal",
"path": "openhands/swe-bench-multimodal",
"primary_metric": "resolved/mean",
"tags": ["swe-bench-multimodal"]
},
{
"name": "swt-bench",
"path": "openhands/swt-bench",
"primary_metric": "generated/mean",
"tags": ["swt-bench"]
},
{
"name": "commit0",
"path": "openhands/commit0",
"primary_metric": "tests_passed/mean",
"tags": ["commit0"]
},
{
"name": "gaia",
"path": "openhands/gaia",
"primary_metric": "correct/mean",
"tags": ["gaia"]
}
]
},
{
"name": "test",
"tasks": [
{
"name": "swe-bench",
"path": "openhands/swe-bench",
"primary_metric": "resolved/mean",
"tags": ["swe-bench"]
},
{
"name": "multi-swe-bench",
"path": "openhands/multi-swe-bench",
"primary_metric": "resolved/mean",
"tags": ["multi-swe-bench"]
},
{
"name": "swe-bench-multimodal",
"path": "openhands/swe-bench-multimodal",
"primary_metric": "resolved/mean",
"tags": ["swe-bench-multimodal"]
},
{
"name": "swt-bench",
"path": "openhands/swt-bench",
"primary_metric": "generated/mean",
"tags": ["swt-bench"]
},
{
"name": "commit0",
"path": "openhands/commit0",
"primary_metric": "tests_passed/mean",
"tags": ["commit0"]
},
{
"name": "gaia",
"path": "openhands/gaia",
"primary_metric": "correct/mean",
"tags": ["gaia"]
}
]
}
]
}
}