Spaces:
Running
Running
File size: 1,635 Bytes
36bbbc9 35d7d47 36bbbc9 35d7d47 36bbbc9 35d7d47 36bbbc9 a9b5126 35d7d47 36bbbc9 35d7d47 36bbbc9 35d7d47 36bbbc9 a9b5126 36bbbc9 a9b5126 35d7d47 36bbbc9 35d7d47 36bbbc9 35d7d47 36bbbc9 a9b5126 36bbbc9 35d7d47 36bbbc9 35d7d47 36bbbc9 a9b5126 35d7d47 36bbbc9 a9b5126 35d7d47 36bbbc9 35d7d47 36bbbc9 35d7d47 a9b5126 36bbbc9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | {
"run_id": "20260519-162229",
"model": "qwen/qwen3.6-flash",
"truncated": false,
"resumed": 0,
"cost": {
"calls": 41,
"prompt_tokens": 332060,
"completion_tokens": 43651,
"usd": 0.0,
"max_usd": 0.0
},
"summary": {
"action-sequenced-execution:hard": {
"n": 1,
"win_rate": 0.0,
"composite_mean": 0.1773,
"composite_std": 0.0,
"perception_mean": 0.6844,
"reasoning_mean": 0.6737,
"action_mean": 1.0,
"objective_mean": 0.375,
"weakest_link_hist": {
"reasoning": 1
}
}
},
"overall": {
"n": 1,
"win_rate": 0.0,
"composite_mean": 0.1773,
"composite_std": 0.0,
"perception_mean": 0.6844,
"reasoning_mean": 0.6737,
"action_mean": 1.0,
"objective_mean": 0.375,
"weakest_link_hist": {
"reasoning": 1
}
},
"reward_vector_mean": {
"economy": 0.5,
"military": 0.0,
"territory": 0.5491,
"scouting": 0.6,
"objective": 0.375
},
"episodes": [
{
"cell": "action-sequenced-execution:hard",
"capability": "action",
"split": "public",
"seed": 1,
"outcome": "loss",
"composite": 0.1773,
"perception": 0.6844,
"reasoning": 0.6737,
"action": 1.0,
"weakest_link": "reasoning",
"objective_progress": 0.375,
"reward_vector": {
"economy": 0.5,
"military": 0.0,
"territory": 0.5491,
"scouting": 0.6,
"objective": 0.375
},
"turns": 41,
"notes": [
"objective not met (loss); weakest link: reasoning"
]
}
],
"skipped": []
} |