File size: 2,028 Bytes
4058302
c3648b5
 
 
4058302
c3648b5
 
 
4058302
 
c3648b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4058302
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
{
  "base_model": "Qwen/Qwen2.5-1.5B-Instruct",
  "dataset_rows": 680,
  "episodes_per_task": 8,
  "random_rewards": [
    -5.96,
    -11.48,
    -12.5
  ],
  "heuristic_rewards": [
    -4.72,
    -0.87,
    5.89
  ],
  "base_model_rewards": [
    -2.92,
    -4.0,
    -4.28
  ],
  "sft_model_rewards": [
    -4.72,
    -0.87,
    5.89
  ],
  "improvement_sft_over_base": [
    -1.8,
    3.13,
    10.17
  ],
  "improvement_heuristic_over_random": [
    1.24,
    10.61,
    18.39
  ],
  "reward_components_by_policy": {
    "random": {
      "wrong_actor_penalty": -3.12,
      "closure_wrong": -17.82,
      "step_cost": -2.61,
      "postmortem_empty": -1.0,
      "escalation_not_needed": -0.3,
      "clue_bonus": 0.48,
      "handoff_wrong": -0.8,
      "mitigation_wrong": -2.1,
      "rollback_ineffective": -1.65,
      "sla_exhausted": -1.2,
      "repeated_lookup_penalty": -0.02,
      "escalation_needed": 0.2
    },
    "heuristic": {
      "step_cost": -2.02,
      "clue_bonus": 2.52,
      "handoff_wrong": -0.8,
      "mitigation_wrong": -2.1,
      "closure_wrong": -9.9,
      "repeated_lookup_penalty": -0.16,
      "handoff_correct": 0.75,
      "postmortem_logged": 0.35,
      "mitigation_correct": 2.1,
      "closure_correct": 7.36,
      "closure_mitigation_bonus": 1.8,
      "speed_bonus": 0.6,
      "postmortem_bonus": 0.6,
      "closure_under_investigated": -0.8
    },
    "base_model": {
      "step_cost": -5.16,
      "clue_bonus": 0.24,
      "repeated_lookup_penalty": -1.24,
      "sla_exhausted": -5.04
    },
    "sft_model": {
      "step_cost": -2.02,
      "clue_bonus": 2.52,
      "handoff_wrong": -0.8,
      "mitigation_wrong": -2.1,
      "closure_wrong": -9.9,
      "repeated_lookup_penalty": -0.16,
      "handoff_correct": 0.75,
      "postmortem_logged": 0.35,
      "mitigation_correct": 2.1,
      "closure_correct": 7.36,
      "closure_mitigation_bonus": 1.8,
      "speed_bonus": 0.6,
      "postmortem_bonus": 0.6,
      "closure_under_investigated": -0.8
    }
  }
}