debjitpaul commited on
Commit ·
939af28
1
Parent(s): 63fb47d
Remove redundant baseline JSON (now in PAPER_BASELINES)
Browse files
submissions/2026-04-10-anthropic-claude-opus-react.json
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"metadata": {
|
| 3 |
-
"agent_name": "ReAct-Claude-Opus",
|
| 4 |
-
"base_model": "claude-opus-4-6",
|
| 5 |
-
"scaffold": "ReAct",
|
| 6 |
-
"tools_used": ["web_search", "python_interpreter"],
|
| 7 |
-
"organization": "DeepSynth authors (paper baseline)",
|
| 8 |
-
"paper_url": "https://arxiv.org/abs/2602.21143",
|
| 9 |
-
"code_url": "https://github.com/agentdeepsynthesis/deepsynth-bench",
|
| 10 |
-
"submission_date": "2026-04-10",
|
| 11 |
-
"split": "test",
|
| 12 |
-
"num_seeds": 3
|
| 13 |
-
},
|
| 14 |
-
"scores": {
|
| 15 |
-
"overall": {"exact_match": 0.22, "f1": 0.38, "llm_judge": 0.47},
|
| 16 |
-
"per_domain": {
|
| 17 |
-
"science": {"f1": 0.REPLACE},
|
| 18 |
-
"geography": {"f1": 0.REPLACE},
|
| 19 |
-
"economics": {"f1": 0.REPLACE},
|
| 20 |
-
"history": {"f1": 0.REPLACE},
|
| 21 |
-
"culture": {"f1": 0.REPLACE},
|
| 22 |
-
"politics": {"f1": 0.REPLACE},
|
| 23 |
-
"technology": {"f1": 0.REPLACE}
|
| 24 |
-
}
|
| 25 |
-
},
|
| 26 |
-
"efficiency": {
|
| 27 |
-
"avg_cost_usd": 0.44,
|
| 28 |
-
"avg_latency_s": 15.2,
|
| 29 |
-
"avg_num_tool_calls": 5.8
|
| 30 |
-
}
|
| 31 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|