uchihamadara1816's picture
Upload 172 files
d02bacd verified
[
{
"method": "sft",
"task": "expert_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2827,
"recorded_terminal": 0.8827,
"replay_terminal": 0.8827,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft",
"task": "risk_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2839,
"recorded_terminal": 0.8839,
"replay_terminal": 0.8839,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft",
"task": "crisis_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2805,
"recorded_terminal": 0.8805,
"replay_terminal": 0.8805,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft",
"task": "expert_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2925,
"recorded_terminal": 0.8925,
"replay_terminal": 0.8925,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft",
"task": "risk_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2948,
"recorded_terminal": 0.8948,
"replay_terminal": 0.8948,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft",
"task": "crisis_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2914,
"recorded_terminal": 0.8914,
"replay_terminal": 0.8914,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "dpo",
"task": "expert_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy",
"fallback": "consult:hr -> summarize -> submit",
"auto_finish": "",
"recorded_policy_reward": -0.26,
"recorded_terminal": 0.8827,
"replay_terminal": 0.8827,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "dpo",
"task": "risk_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy",
"fallback": "consult:hr -> summarize -> submit",
"auto_finish": "",
"recorded_policy_reward": -0.26,
"recorded_terminal": 0.8839,
"replay_terminal": 0.8839,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "dpo",
"task": "crisis_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy",
"fallback": "consult:hr -> summarize -> submit",
"auto_finish": "",
"recorded_policy_reward": -0.26,
"recorded_terminal": 0.8805,
"replay_terminal": 0.8805,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "dpo",
"task": "expert_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy",
"fallback": "consult:hr -> summarize -> submit",
"auto_finish": "",
"recorded_policy_reward": -0.26,
"recorded_terminal": 0.8925,
"replay_terminal": 0.8925,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "dpo",
"task": "risk_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy",
"fallback": "consult:hr -> summarize -> submit",
"auto_finish": "",
"recorded_policy_reward": -0.26,
"recorded_terminal": 0.8948,
"replay_terminal": 0.8948,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "dpo",
"task": "crisis_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:strategy -> consult:strategy -> consult:strategy",
"fallback": "consult:hr -> summarize -> submit",
"auto_finish": "",
"recorded_policy_reward": -0.26,
"recorded_terminal": 0.8914,
"replay_terminal": 0.8914,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft_dpo",
"task": "expert_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2827,
"recorded_terminal": 0.8827,
"replay_terminal": 0.8827,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft_dpo",
"task": "risk_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2839,
"recorded_terminal": 0.8839,
"replay_terminal": 0.8839,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft_dpo",
"task": "crisis_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2805,
"recorded_terminal": 0.8805,
"replay_terminal": 0.8805,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft_dpo",
"task": "expert_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2925,
"recorded_terminal": 0.8925,
"replay_terminal": 0.8925,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft_dpo",
"task": "risk_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2948,
"recorded_terminal": 0.8948,
"replay_terminal": 0.8948,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "sft_dpo",
"task": "crisis_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2914,
"recorded_terminal": 0.8914,
"replay_terminal": 0.8914,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "grpo_rlvr",
"task": "expert_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2827,
"recorded_terminal": 0.8827,
"replay_terminal": 0.8827,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "grpo_rlvr",
"task": "risk_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2839,
"recorded_terminal": 0.8839,
"replay_terminal": 0.8839,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "grpo_rlvr",
"task": "crisis_brief",
"rag": false,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2805,
"recorded_terminal": 0.8805,
"replay_terminal": 0.8805,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "grpo_rlvr",
"task": "expert_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2925,
"recorded_terminal": 0.8925,
"replay_terminal": 0.8925,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "grpo_rlvr",
"task": "risk_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2948,
"recorded_terminal": 0.8948,
"replay_terminal": 0.8948,
"consulted": "analyst, finance, strategy, hr"
},
{
"method": "grpo_rlvr",
"task": "crisis_brief",
"rag": true,
"model_route": "consult:analyst -> consult:finance -> consult:strategy -> consult:hr -> summarize -> submit",
"fallback": "",
"auto_finish": "",
"recorded_policy_reward": 1.2914,
"recorded_terminal": 0.8914,
"replay_terminal": 0.8914,
"consulted": "analyst, finance, strategy, hr"
}
]