adityss commited on
Commit
f1bfee9
·
1 Parent(s): cb1f26a

feat: implement reward system logic in new rewards module

Browse files
Files changed (1) hide show
  1. env/rewards.go +8 -8
env/rewards.go CHANGED
@@ -31,10 +31,10 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
31
  rc := RewardComponents{}
32
 
33
  // ── 1. Cost Savings ─────────────────────────────────────────────────────
34
- // Negative reward proportional to energy cost. Normalised by typical step cost.
35
- // Typical step cost at full load, peak price: 50kW * 0.25h * 0.32 = $4.00.
36
  typicalCost := 4.0
37
- rc.CostSavings = -(inp.StepCost / typicalCost) * 2.0
38
 
39
  // ── 2. Temperature Constraint ────────────────────────────────────────────
40
  // Only active for task 2 and 3.
@@ -75,12 +75,12 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
75
  }
76
 
77
  // ── 7. Carbon Reward ─────────────────────────────────────────────────────
78
- // Low-carbon bonus: active for task 3 (and optional overlay on others).
79
  if inp.TaskID >= 3 {
80
  // Normalise carbon: iso-ne range roughly 100–700 gCO2/kWh
81
  carbonNorm := (inp.B.CarbonIntensity - 100.0) / 600.0
82
- // Reward for reducing energy during high-carbon periods
83
- rc.CarbonReward = -inp.EnergyKWh * carbonNorm * 0.3
84
  }
85
 
86
  // ── Aggregate ────────────────────────────────────────────────────────────
@@ -97,11 +97,11 @@ func computeTempReward(temp, setpoint, tMin, tMax float64) float64 {
97
  // Gaussian-shaped bonus: maximum at setpoint, degrades toward bounds
98
  deviation := math.Abs(temp - setpoint)
99
  sigma := (tMax - tMin) / 4.0
100
- return math.Exp(-0.5*(deviation/sigma)*(deviation/sigma)) * 0.5
101
  }
102
  // Outside bounds: proportional penalty
103
  excess := math.Max(temp-tMax, tMin-temp)
104
- return -excess * 0.4
105
  }
106
 
107
  // computeGridResponse returns a bonus for shedding load during high grid stress,
 
31
  rc := RewardComponents{}
32
 
33
  // ── 1. Cost Savings ─────────────────────────────────────────────────────
34
+ // Shift from pure penalty to a positive baseline: standardizing operations gives positive reward.
35
+ // Baseline reward of 1.5, minus the relative cost.
36
  typicalCost := 4.0
37
+ rc.CostSavings = 1.5 - (inp.StepCost / typicalCost) * 2.0
38
 
39
  // ── 2. Temperature Constraint ────────────────────────────────────────────
40
  // Only active for task 2 and 3.
 
75
  }
76
 
77
  // ── 7. Carbon Reward ─────────────────────────────────────────────────────
78
+ // Low-carbon bonus: active for task 3.
79
  if inp.TaskID >= 3 {
80
  // Normalise carbon: iso-ne range roughly 100–700 gCO2/kWh
81
  carbonNorm := (inp.B.CarbonIntensity - 100.0) / 600.0
82
+ // Provide a baseline positive score, reduced by carbon footprint
83
+ rc.CarbonReward = 0.5 - (inp.EnergyKWh * carbonNorm * 0.3)
84
  }
85
 
86
  // ── Aggregate ────────────────────────────────────────────────────────────
 
97
  // Gaussian-shaped bonus: maximum at setpoint, degrades toward bounds
98
  deviation := math.Abs(temp - setpoint)
99
  sigma := (tMax - tMin) / 4.0
100
+ return math.Exp(-0.5*(deviation/sigma)*(deviation/sigma)) * 1.5 // Increased positive reward
101
  }
102
  // Outside bounds: proportional penalty
103
  excess := math.Max(temp-tMax, tMin-temp)
104
+ return -excess * 0.6
105
  }
106
 
107
  // computeGridResponse returns a bonus for shedding load during high grid stress,