File size: 1,635 Bytes
36bbbc9
35d7d47
36bbbc9
 
 
 
35d7d47
 
 
36bbbc9
 
 
 
35d7d47
36bbbc9
a9b5126
35d7d47
36bbbc9
35d7d47
 
36bbbc9
35d7d47
36bbbc9
a9b5126
36bbbc9
 
 
 
 
a9b5126
35d7d47
36bbbc9
35d7d47
 
36bbbc9
35d7d47
36bbbc9
a9b5126
36bbbc9
 
 
 
35d7d47
 
 
 
36bbbc9
 
 
35d7d47
36bbbc9
 
 
a9b5126
35d7d47
 
 
36bbbc9
a9b5126
35d7d47
36bbbc9
 
35d7d47
 
 
 
36bbbc9
35d7d47
a9b5126
 
 
36bbbc9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
{
  "run_id": "20260519-162229",
  "model": "qwen/qwen3.6-flash",
  "truncated": false,
  "resumed": 0,
  "cost": {
    "calls": 41,
    "prompt_tokens": 332060,
    "completion_tokens": 43651,
    "usd": 0.0,
    "max_usd": 0.0
  },
  "summary": {
    "action-sequenced-execution:hard": {
      "n": 1,
      "win_rate": 0.0,
      "composite_mean": 0.1773,
      "composite_std": 0.0,
      "perception_mean": 0.6844,
      "reasoning_mean": 0.6737,
      "action_mean": 1.0,
      "objective_mean": 0.375,
      "weakest_link_hist": {
        "reasoning": 1
      }
    }
  },
  "overall": {
    "n": 1,
    "win_rate": 0.0,
    "composite_mean": 0.1773,
    "composite_std": 0.0,
    "perception_mean": 0.6844,
    "reasoning_mean": 0.6737,
    "action_mean": 1.0,
    "objective_mean": 0.375,
    "weakest_link_hist": {
      "reasoning": 1
    }
  },
  "reward_vector_mean": {
    "economy": 0.5,
    "military": 0.0,
    "territory": 0.5491,
    "scouting": 0.6,
    "objective": 0.375
  },
  "episodes": [
    {
      "cell": "action-sequenced-execution:hard",
      "capability": "action",
      "split": "public",
      "seed": 1,
      "outcome": "loss",
      "composite": 0.1773,
      "perception": 0.6844,
      "reasoning": 0.6737,
      "action": 1.0,
      "weakest_link": "reasoning",
      "objective_progress": 0.375,
      "reward_vector": {
        "economy": 0.5,
        "military": 0.0,
        "territory": 0.5491,
        "scouting": 0.6,
        "objective": 0.375
      },
      "turns": 41,
      "notes": [
        "objective not met (loss); weakest link: reasoning"
      ]
    }
  ],
  "skipped": []
}