File size: 2,028 Bytes
4058302 c3648b5 4058302 c3648b5 4058302 c3648b5 4058302 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | {
"base_model": "Qwen/Qwen2.5-1.5B-Instruct",
"dataset_rows": 680,
"episodes_per_task": 8,
"random_rewards": [
-5.96,
-11.48,
-12.5
],
"heuristic_rewards": [
-4.72,
-0.87,
5.89
],
"base_model_rewards": [
-2.92,
-4.0,
-4.28
],
"sft_model_rewards": [
-4.72,
-0.87,
5.89
],
"improvement_sft_over_base": [
-1.8,
3.13,
10.17
],
"improvement_heuristic_over_random": [
1.24,
10.61,
18.39
],
"reward_components_by_policy": {
"random": {
"wrong_actor_penalty": -3.12,
"closure_wrong": -17.82,
"step_cost": -2.61,
"postmortem_empty": -1.0,
"escalation_not_needed": -0.3,
"clue_bonus": 0.48,
"handoff_wrong": -0.8,
"mitigation_wrong": -2.1,
"rollback_ineffective": -1.65,
"sla_exhausted": -1.2,
"repeated_lookup_penalty": -0.02,
"escalation_needed": 0.2
},
"heuristic": {
"step_cost": -2.02,
"clue_bonus": 2.52,
"handoff_wrong": -0.8,
"mitigation_wrong": -2.1,
"closure_wrong": -9.9,
"repeated_lookup_penalty": -0.16,
"handoff_correct": 0.75,
"postmortem_logged": 0.35,
"mitigation_correct": 2.1,
"closure_correct": 7.36,
"closure_mitigation_bonus": 1.8,
"speed_bonus": 0.6,
"postmortem_bonus": 0.6,
"closure_under_investigated": -0.8
},
"base_model": {
"step_cost": -5.16,
"clue_bonus": 0.24,
"repeated_lookup_penalty": -1.24,
"sla_exhausted": -5.04
},
"sft_model": {
"step_cost": -2.02,
"clue_bonus": 2.52,
"handoff_wrong": -0.8,
"mitigation_wrong": -2.1,
"closure_wrong": -9.9,
"repeated_lookup_penalty": -0.16,
"handoff_correct": 0.75,
"postmortem_logged": 0.35,
"mitigation_correct": 2.1,
"closure_correct": 7.36,
"closure_mitigation_bonus": 1.8,
"speed_bonus": 0.6,
"postmortem_bonus": 0.6,
"closure_under_investigated": -0.8
}
}
} |