AutoDataLab2.0 / training /evidence /sft /evidence.json
uchihamadara1816's picture
Upload 172 files
d02bacd verified
[
{
"task": "expert_brief",
"rag": false,
"action_sequence": [
"consult:analyst",
"consult:finance",
"consult:strategy",
"consult:hr",
"summarize",
"submit"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
],
"required_experts": [
"analyst",
"finance",
"strategy",
"hr"
],
"fallback": [],
"needed_fallback": false,
"policy_reward": 1.2827,
"total_reward": 1.2827,
"terminal_score": 0.8827,
"trace": [
{
"step": 1,
"action": {
"action_type": "consult",
"expert_id": "analyst"
},
"action_label": "consult:analyst",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"analyst\"}",
"reward": 0.08,
"consulted_after": [
"analyst"
],
"model_routed_required": [
"analyst"
]
},
{
"step": 2,
"action": {
"action_type": "consult",
"expert_id": "finance"
},
"action_label": "consult:finance",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"finance\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance"
],
"model_routed_required": [
"analyst",
"finance"
]
},
{
"step": 3,
"action": {
"action_type": "consult",
"expert_id": "strategy"
},
"action_label": "consult:strategy",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"strategy\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy"
],
"model_routed_required": [
"analyst",
"finance",
"strategy"
]
},
{
"step": 4,
"action": {
"action_type": "consult",
"expert_id": "hr"
},
"action_label": "consult:hr",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"hr\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 5,
"action": {
"action_type": "summarize"
},
"action_label": "summarize",
"completion_preview": "{\"action_type\":\"summarize\"}",
"reward": 0.02,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 6,
"action": {
"action_type": "submit"
},
"action_label": "submit",
"completion_preview": "{\"action_type\":\"submit\"}",
"reward": 0.9427,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
}
]
},
{
"task": "risk_brief",
"rag": false,
"action_sequence": [
"consult:analyst",
"consult:finance",
"consult:strategy",
"consult:hr",
"summarize",
"submit"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
],
"required_experts": [
"analyst",
"finance",
"strategy",
"hr"
],
"fallback": [],
"needed_fallback": false,
"policy_reward": 1.2839,
"total_reward": 1.2839,
"terminal_score": 0.8839,
"trace": [
{
"step": 1,
"action": {
"action_type": "consult",
"expert_id": "analyst"
},
"action_label": "consult:analyst",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"analyst\"}",
"reward": 0.08,
"consulted_after": [
"analyst"
],
"model_routed_required": [
"analyst"
]
},
{
"step": 2,
"action": {
"action_type": "consult",
"expert_id": "finance"
},
"action_label": "consult:finance",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"finance\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance"
],
"model_routed_required": [
"analyst",
"finance"
]
},
{
"step": 3,
"action": {
"action_type": "consult",
"expert_id": "strategy"
},
"action_label": "consult:strategy",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"strategy\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy"
],
"model_routed_required": [
"analyst",
"finance",
"strategy"
]
},
{
"step": 4,
"action": {
"action_type": "consult",
"expert_id": "hr"
},
"action_label": "consult:hr",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"hr\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 5,
"action": {
"action_type": "summarize"
},
"action_label": "summarize",
"completion_preview": "{\"action_type\":\"summarize\"}",
"reward": 0.02,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 6,
"action": {
"action_type": "submit"
},
"action_label": "submit",
"completion_preview": "{\"action_type\":\"submit\"}",
"reward": 0.9439,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
}
]
},
{
"task": "crisis_brief",
"rag": false,
"action_sequence": [
"consult:analyst",
"consult:finance",
"consult:strategy",
"consult:hr",
"summarize",
"submit"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
],
"required_experts": [
"analyst",
"finance",
"strategy",
"hr"
],
"fallback": [],
"needed_fallback": false,
"policy_reward": 1.2805,
"total_reward": 1.2805,
"terminal_score": 0.8805,
"trace": [
{
"step": 1,
"action": {
"action_type": "consult",
"expert_id": "analyst"
},
"action_label": "consult:analyst",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"analyst\"}",
"reward": 0.08,
"consulted_after": [
"analyst"
],
"model_routed_required": [
"analyst"
]
},
{
"step": 2,
"action": {
"action_type": "consult",
"expert_id": "finance"
},
"action_label": "consult:finance",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"finance\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance"
],
"model_routed_required": [
"analyst",
"finance"
]
},
{
"step": 3,
"action": {
"action_type": "consult",
"expert_id": "strategy"
},
"action_label": "consult:strategy",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"strategy\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy"
],
"model_routed_required": [
"analyst",
"finance",
"strategy"
]
},
{
"step": 4,
"action": {
"action_type": "consult",
"expert_id": "hr"
},
"action_label": "consult:hr",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"hr\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 5,
"action": {
"action_type": "summarize"
},
"action_label": "summarize",
"completion_preview": "{\"action_type\":\"summarize\"}",
"reward": 0.02,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 6,
"action": {
"action_type": "submit"
},
"action_label": "submit",
"completion_preview": "{\"action_type\":\"submit\"}",
"reward": 0.9405,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
}
]
},
{
"task": "expert_brief",
"rag": true,
"action_sequence": [
"consult:analyst",
"consult:finance",
"consult:strategy",
"consult:hr",
"summarize",
"submit"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
],
"required_experts": [
"analyst",
"finance",
"strategy",
"hr"
],
"fallback": [],
"needed_fallback": false,
"policy_reward": 1.2925,
"total_reward": 1.2925,
"terminal_score": 0.8925,
"trace": [
{
"step": 1,
"action": {
"action_type": "consult",
"expert_id": "analyst"
},
"action_label": "consult:analyst",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"analyst\"}",
"reward": 0.08,
"consulted_after": [
"analyst"
],
"model_routed_required": [
"analyst"
]
},
{
"step": 2,
"action": {
"action_type": "consult",
"expert_id": "finance"
},
"action_label": "consult:finance",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"finance\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance"
],
"model_routed_required": [
"analyst",
"finance"
]
},
{
"step": 3,
"action": {
"action_type": "consult",
"expert_id": "strategy"
},
"action_label": "consult:strategy",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"strategy\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy"
],
"model_routed_required": [
"analyst",
"finance",
"strategy"
]
},
{
"step": 4,
"action": {
"action_type": "consult",
"expert_id": "hr"
},
"action_label": "consult:hr",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"hr\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 5,
"action": {
"action_type": "summarize"
},
"action_label": "summarize",
"completion_preview": "{\"action_type\":\"summarize\"}",
"reward": 0.02,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 6,
"action": {
"action_type": "submit"
},
"action_label": "submit",
"completion_preview": "{\"action_type\":\"submit\"}",
"reward": 0.9525,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
}
]
},
{
"task": "risk_brief",
"rag": true,
"action_sequence": [
"consult:analyst",
"consult:finance",
"consult:strategy",
"consult:hr",
"summarize",
"submit"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
],
"required_experts": [
"analyst",
"finance",
"strategy",
"hr"
],
"fallback": [],
"needed_fallback": false,
"policy_reward": 1.2948,
"total_reward": 1.2948,
"terminal_score": 0.8948,
"trace": [
{
"step": 1,
"action": {
"action_type": "consult",
"expert_id": "analyst"
},
"action_label": "consult:analyst",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"analyst\"}",
"reward": 0.08,
"consulted_after": [
"analyst"
],
"model_routed_required": [
"analyst"
]
},
{
"step": 2,
"action": {
"action_type": "consult",
"expert_id": "finance"
},
"action_label": "consult:finance",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"finance\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance"
],
"model_routed_required": [
"analyst",
"finance"
]
},
{
"step": 3,
"action": {
"action_type": "consult",
"expert_id": "strategy"
},
"action_label": "consult:strategy",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"strategy\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy"
],
"model_routed_required": [
"analyst",
"finance",
"strategy"
]
},
{
"step": 4,
"action": {
"action_type": "consult",
"expert_id": "hr"
},
"action_label": "consult:hr",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"hr\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 5,
"action": {
"action_type": "summarize"
},
"action_label": "summarize",
"completion_preview": "{\"action_type\":\"summarize\"}",
"reward": 0.02,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 6,
"action": {
"action_type": "submit"
},
"action_label": "submit",
"completion_preview": "{\"action_type\":\"submit\"}",
"reward": 0.9548,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
}
]
},
{
"task": "crisis_brief",
"rag": true,
"action_sequence": [
"consult:analyst",
"consult:finance",
"consult:strategy",
"consult:hr",
"summarize",
"submit"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
],
"required_experts": [
"analyst",
"finance",
"strategy",
"hr"
],
"fallback": [],
"needed_fallback": false,
"policy_reward": 1.2914,
"total_reward": 1.2914,
"terminal_score": 0.8914,
"trace": [
{
"step": 1,
"action": {
"action_type": "consult",
"expert_id": "analyst"
},
"action_label": "consult:analyst",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"analyst\"}",
"reward": 0.08,
"consulted_after": [
"analyst"
],
"model_routed_required": [
"analyst"
]
},
{
"step": 2,
"action": {
"action_type": "consult",
"expert_id": "finance"
},
"action_label": "consult:finance",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"finance\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance"
],
"model_routed_required": [
"analyst",
"finance"
]
},
{
"step": 3,
"action": {
"action_type": "consult",
"expert_id": "strategy"
},
"action_label": "consult:strategy",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"strategy\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy"
],
"model_routed_required": [
"analyst",
"finance",
"strategy"
]
},
{
"step": 4,
"action": {
"action_type": "consult",
"expert_id": "hr"
},
"action_label": "consult:hr",
"completion_preview": "{\"action_type\":\"consult\",\"expert_id\":\"hr\"}",
"reward": 0.08,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 5,
"action": {
"action_type": "summarize"
},
"action_label": "summarize",
"completion_preview": "{\"action_type\":\"summarize\"}",
"reward": 0.02,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
},
{
"step": 6,
"action": {
"action_type": "submit"
},
"action_label": "submit",
"completion_preview": "{\"action_type\":\"submit\"}",
"reward": 0.9514,
"consulted_after": [
"analyst",
"finance",
"strategy",
"hr"
],
"model_routed_required": [
"analyst",
"finance",
"strategy",
"hr"
]
}
]
}
]