Spaces:
Sleeping
Sleeping
| { | |
| "schema": "autodatalab-plus.headline_benchmark.v2", | |
| "config": { | |
| "tasks": [ | |
| "hard_brief", | |
| "expert_brief", | |
| "crisis_brief" | |
| ], | |
| "policies": [ | |
| "base_naive", | |
| "base_roundrobin", | |
| "trained_mlp", | |
| "oracle_router" | |
| ], | |
| "policy_info": { | |
| "base_naive": "untrained baseline (analyst-only)", | |
| "base_roundrobin": "untrained baseline (fixed order)", | |
| "trained_mlp": "trained MLP CoS (REINFORCE, 600 ep) - cos_final.pt (ok)", | |
| "oracle_router": "oracle router (upper bound, handcoded canonical sequence)" | |
| }, | |
| "seeds": [ | |
| 11, | |
| 23, | |
| 47, | |
| 91, | |
| 137 | |
| ], | |
| "rag_settings": [ | |
| false, | |
| true | |
| ], | |
| "auto_fill_required": false, | |
| "shaping": "strict" | |
| }, | |
| "cells": [ | |
| { | |
| "task": "hard_brief", | |
| "policy": "base_naive", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.2646, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 0.1346, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "hard_brief", | |
| "policy": "base_roundrobin", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.8827, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2627, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "hard_brief", | |
| "policy": "trained_mlp", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.7286, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": -0.7314, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.7286, | |
| "cumulative": -0.7314, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.7286, | |
| "cumulative": -0.7314, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.7286, | |
| "cumulative": -0.7314, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.7286, | |
| "cumulative": -0.7314, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.7286, | |
| "cumulative": -0.7314, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "hard_brief", | |
| "policy": "oracle_router", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.8827, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2627, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2627, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "hard_brief", | |
| "policy": "base_naive", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.3217, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 0.1917, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "hard_brief", | |
| "policy": "base_roundrobin", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.897, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.277, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "hard_brief", | |
| "policy": "trained_mlp", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.7429, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": -0.7171, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.7429, | |
| "cumulative": -0.7171, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.7429, | |
| "cumulative": -0.7171, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.7429, | |
| "cumulative": -0.7171, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.7429, | |
| "cumulative": -0.7171, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.7429, | |
| "cumulative": -0.7171, | |
| "steps": 12, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "hard_brief", | |
| "policy": "oracle_router", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.897, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.277, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.897, | |
| "cumulative": 1.277, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "expert_brief", | |
| "policy": "base_naive", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.2646, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 0.1346, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "expert_brief", | |
| "policy": "base_roundrobin", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.8827, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2827, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "expert_brief", | |
| "policy": "trained_mlp", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.7286, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": -1.1314, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "expert_brief", | |
| "policy": "oracle_router", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.8827, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2827, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8827, | |
| "cumulative": 1.2827, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "expert_brief", | |
| "policy": "base_naive", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.3217, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 0.1917, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "expert_brief", | |
| "policy": "base_roundrobin", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.8924999999999998, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2925, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "expert_brief", | |
| "policy": "trained_mlp", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.7429, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": -1.1171, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "expert_brief", | |
| "policy": "oracle_router", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.8924999999999998, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2925, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8925, | |
| "cumulative": 1.2925, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "crisis_brief", | |
| "policy": "base_naive", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.2646, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 0.1346, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.2646, | |
| "cumulative": 0.1346, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "crisis_brief", | |
| "policy": "base_roundrobin", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.8805, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2805, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "crisis_brief", | |
| "policy": "trained_mlp", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.7286, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": -1.1314, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.7286, | |
| "cumulative": -1.1314, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "crisis_brief", | |
| "policy": "oracle_router", | |
| "use_rag": false, | |
| "terminal": { | |
| "mean": 0.8805, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2805, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8805, | |
| "cumulative": 1.2805, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "crisis_brief", | |
| "policy": "base_naive", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.3217, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 0.1917, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.3217, | |
| "cumulative": 0.1917, | |
| "steps": 3, | |
| "consulted": [ | |
| "analyst" | |
| ], | |
| "path": [ | |
| "analyst" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "crisis_brief", | |
| "policy": "base_roundrobin", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.8914, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2914, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "path": [ | |
| "finance", | |
| "analyst", | |
| "hr", | |
| "strategy" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "crisis_brief", | |
| "policy": "trained_mlp", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.7429, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": -1.1171, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.7429, | |
| "cumulative": -1.1171, | |
| "steps": 14, | |
| "consulted": [ | |
| "analyst", | |
| "strategy", | |
| "finance" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance", | |
| "finance" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| }, | |
| { | |
| "task": "crisis_brief", | |
| "policy": "oracle_router", | |
| "use_rag": true, | |
| "terminal": { | |
| "mean": 0.8914, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "cumulative": { | |
| "mean": 1.2914, | |
| "std": 0.0, | |
| "n": 5 | |
| }, | |
| "runs": [ | |
| { | |
| "seed": 11, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 23, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 47, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 91, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| }, | |
| { | |
| "seed": 137, | |
| "terminal": 0.8914, | |
| "cumulative": 1.2914, | |
| "steps": 6, | |
| "consulted": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "path": [ | |
| "analyst", | |
| "finance", | |
| "strategy", | |
| "hr" | |
| ], | |
| "submitted": true | |
| } | |
| ] | |
| } | |
| ] | |
| } |