Spaces:
Sleeping
Sleeping
| [ | |
| { | |
| "method": "sft", | |
| "task": "expert_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2827, | |
| "recorded_terminal": 0.8827, | |
| "replay_terminal": 0.8827, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft", | |
| "task": "risk_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2839, | |
| "recorded_terminal": 0.8839, | |
| "replay_terminal": 0.8839, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft", | |
| "task": "crisis_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2805, | |
| "recorded_terminal": 0.8805, | |
| "replay_terminal": 0.8805, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft", | |
| "task": "expert_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2925, | |
| "recorded_terminal": 0.8925, | |
| "replay_terminal": 0.8925, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft", | |
| "task": "risk_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2948, | |
| "recorded_terminal": 0.8948, | |
| "replay_terminal": 0.8948, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft", | |
| "task": "crisis_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2914, | |
| "recorded_terminal": 0.8914, | |
| "replay_terminal": 0.8914, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "dpo", | |
| "task": "expert_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", | |
| "fallback": "consult:hr -> summarize -> submit", | |
| "auto_finish": "", | |
| "recorded_policy_reward": -0.26, | |
| "recorded_terminal": 0.8827, | |
| "replay_terminal": 0.8827, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "dpo", | |
| "task": "risk_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", | |
| "fallback": "consult:hr -> summarize -> submit", | |
| "auto_finish": "", | |
| "recorded_policy_reward": -0.26, | |
| "recorded_terminal": 0.8839, | |
| "replay_terminal": 0.8839, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "dpo", | |
| "task": "crisis_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", | |
| "fallback": "consult:hr -> summarize -> submit", | |
| "auto_finish": "", | |
| "recorded_policy_reward": -0.26, | |
| "recorded_terminal": 0.8805, | |
| "replay_terminal": 0.8805, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "dpo", | |
| "task": "expert_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", | |
| "fallback": "consult:hr -> summarize -> submit", | |
| "auto_finish": "", | |
| "recorded_policy_reward": -0.26, | |
| "recorded_terminal": 0.8925, | |
| "replay_terminal": 0.8925, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "dpo", | |
| "task": "risk_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", | |
| "fallback": "consult:hr -> summarize -> submit", | |
| "auto_finish": "", | |
| "recorded_policy_reward": -0.26, | |
| "recorded_terminal": 0.8948, | |
| "replay_terminal": 0.8948, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "dpo", | |
| "task": "crisis_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy", | |
| "fallback": "consult:hr -> summarize -> submit", | |
| "auto_finish": "", | |
| "recorded_policy_reward": -0.26, | |
| "recorded_terminal": 0.8914, | |
| "replay_terminal": 0.8914, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft_dpo", | |
| "task": "expert_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2827, | |
| "recorded_terminal": 0.8827, | |
| "replay_terminal": 0.8827, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft_dpo", | |
| "task": "risk_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2839, | |
| "recorded_terminal": 0.8839, | |
| "replay_terminal": 0.8839, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft_dpo", | |
| "task": "crisis_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2805, | |
| "recorded_terminal": 0.8805, | |
| "replay_terminal": 0.8805, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft_dpo", | |
| "task": "expert_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2925, | |
| "recorded_terminal": 0.8925, | |
| "replay_terminal": 0.8925, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft_dpo", | |
| "task": "risk_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2948, | |
| "recorded_terminal": 0.8948, | |
| "replay_terminal": 0.8948, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "sft_dpo", | |
| "task": "crisis_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2914, | |
| "recorded_terminal": 0.8914, | |
| "replay_terminal": 0.8914, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "grpo_rlvr", | |
| "task": "expert_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2827, | |
| "recorded_terminal": 0.8827, | |
| "replay_terminal": 0.8827, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "grpo_rlvr", | |
| "task": "risk_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2839, | |
| "recorded_terminal": 0.8839, | |
| "replay_terminal": 0.8839, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "grpo_rlvr", | |
| "task": "crisis_brief", | |
| "rag": false, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2805, | |
| "recorded_terminal": 0.8805, | |
| "replay_terminal": 0.8805, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "grpo_rlvr", | |
| "task": "expert_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2925, | |
| "recorded_terminal": 0.8925, | |
| "replay_terminal": 0.8925, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "grpo_rlvr", | |
| "task": "risk_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2948, | |
| "recorded_terminal": 0.8948, | |
| "replay_terminal": 0.8948, | |
| "consulted": "analyst, finance, strategy, hr" | |
| }, | |
| { | |
| "method": "grpo_rlvr", | |
| "task": "crisis_brief", | |
| "rag": true, | |
| "model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit", | |
| "fallback": "", | |
| "auto_finish": "", | |
| "recorded_policy_reward": 1.2914, | |
| "recorded_terminal": 0.8914, | |
| "replay_terminal": 0.8914, | |
| "consulted": "analyst, finance, strategy, hr" | |
| } | |
| ] |