debjitpaul commited on
Commit
939af28
·
1 Parent(s): 63fb47d

Remove redundant baseline JSON (now in PAPER_BASELINES)

Browse files
submissions/2026-04-10-anthropic-claude-opus-react.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "metadata": {
3
- "agent_name": "ReAct-Claude-Opus",
4
- "base_model": "claude-opus-4-6",
5
- "scaffold": "ReAct",
6
- "tools_used": ["web_search", "python_interpreter"],
7
- "organization": "DeepSynth authors (paper baseline)",
8
- "paper_url": "https://arxiv.org/abs/2602.21143",
9
- "code_url": "https://github.com/agentdeepsynthesis/deepsynth-bench",
10
- "submission_date": "2026-04-10",
11
- "split": "test",
12
- "num_seeds": 3
13
- },
14
- "scores": {
15
- "overall": {"exact_match": 0.22, "f1": 0.38, "llm_judge": 0.47},
16
- "per_domain": {
17
- "science": {"f1": 0.REPLACE},
18
- "geography": {"f1": 0.REPLACE},
19
- "economics": {"f1": 0.REPLACE},
20
- "history": {"f1": 0.REPLACE},
21
- "culture": {"f1": 0.REPLACE},
22
- "politics": {"f1": 0.REPLACE},
23
- "technology": {"f1": 0.REPLACE}
24
- }
25
- },
26
- "efficiency": {
27
- "avg_cost_usd": 0.44,
28
- "avg_latency_s": 15.2,
29
- "avg_num_tool_calls": 5.8
30
- }
31
- }