[ { "method": "sft", "task": "expert_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2827, "recorded_terminal": 0.8827, "replay_terminal": 0.8827, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft", "task": "risk_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2839, "recorded_terminal": 0.8839, "replay_terminal": 0.8839, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft", "task": "crisis_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2805, "recorded_terminal": 0.8805, "replay_terminal": 0.8805, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft", "task": "expert_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2925, "recorded_terminal": 0.8925, "replay_terminal": 0.8925, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft", "task": "risk_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2948, "recorded_terminal": 0.8948, "replay_terminal": 0.8948, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft", "task": "crisis_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2914, "recorded_terminal": 0.8914, "replay_terminal": 0.8914, "consulted": "analyst, finance, strategy, hr" }, { "method": "dpo", "task": "expert_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", "fallback": "consult:hr -> summarize -> submit", "auto_finish": "", "recorded_policy_reward": -0.26, "recorded_terminal": 0.8827, "replay_terminal": 0.8827, "consulted": "analyst, finance, strategy, hr" }, { "method": "dpo", "task": "risk_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", "fallback": "consult:hr -> summarize -> submit", "auto_finish": "", "recorded_policy_reward": -0.26, "recorded_terminal": 0.8839, "replay_terminal": 0.8839, "consulted": "analyst, finance, strategy, hr" }, { "method": "dpo", "task": "crisis_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", "fallback": "consult:hr -> summarize -> submit", "auto_finish": "", "recorded_policy_reward": -0.26, "recorded_terminal": 0.8805, "replay_terminal": 0.8805, "consulted": "analyst, finance, strategy, hr" }, { "method": "dpo", "task": "expert_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", "fallback": "consult:hr -> summarize -> submit", "auto_finish": "", "recorded_policy_reward": -0.26, "recorded_terminal": 0.8925, "replay_terminal": 0.8925, "consulted": "analyst, finance, strategy, hr" }, { "method": "dpo", "task": "risk_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", "fallback": "consult:hr -> summarize -> submit", "auto_finish": "", "recorded_policy_reward": -0.26, "recorded_terminal": 0.8948, "replay_terminal": 0.8948, "consulted": "analyst, finance, strategy, hr" }, { "method": "dpo", "task": "crisis_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", "fallback": "consult:hr -> summarize -> submit", "auto_finish": "", "recorded_policy_reward": -0.26, "recorded_terminal": 0.8914, "replay_terminal": 0.8914, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft_dpo", "task": "expert_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2827, "recorded_terminal": 0.8827, "replay_terminal": 0.8827, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft_dpo", "task": "risk_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2839, "recorded_terminal": 0.8839, "replay_terminal": 0.8839, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft_dpo", "task": "crisis_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2805, "recorded_terminal": 0.8805, "replay_terminal": 0.8805, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft_dpo", "task": "expert_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2925, "recorded_terminal": 0.8925, "replay_terminal": 0.8925, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft_dpo", "task": "risk_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2948, "recorded_terminal": 0.8948, "replay_terminal": 0.8948, "consulted": "analyst, finance, strategy, hr" }, { "method": "sft_dpo", "task": "crisis_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2914, "recorded_terminal": 0.8914, "replay_terminal": 0.8914, "consulted": "analyst, finance, strategy, hr" }, { "method": "grpo_rlvr", "task": "expert_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2827, "recorded_terminal": 0.8827, "replay_terminal": 0.8827, "consulted": "analyst, finance, strategy, hr" }, { "method": "grpo_rlvr", "task": "risk_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2839, "recorded_terminal": 0.8839, "replay_terminal": 0.8839, "consulted": "analyst, finance, strategy, hr" }, { "method": "grpo_rlvr", "task": "crisis_brief", "rag": false, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2805, "recorded_terminal": 0.8805, "replay_terminal": 0.8805, "consulted": "analyst, finance, strategy, hr" }, { "method": "grpo_rlvr", "task": "expert_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2925, "recorded_terminal": 0.8925, "replay_terminal": 0.8925, "consulted": "analyst, finance, strategy, hr" }, { "method": "grpo_rlvr", "task": "risk_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2948, "recorded_terminal": 0.8948, "replay_terminal": 0.8948, "consulted": "analyst, finance, strategy, hr" }, { "method": "grpo_rlvr", "task": "crisis_brief", "rag": true, "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", "fallback": "", "auto_finish": "", "recorded_policy_reward": 1.2914, "recorded_terminal": 0.8914, "replay_terminal": 0.8914, "consulted": "analyst, finance, strategy, hr" } ]