Spaces:

LO-Kyu
/

gridmind

Running

App Files Files Community

adityss commited on 9 days ago

Commit

5569b4d

1 Parent(s): 0361922

feat: implement core environment simulation logic and update baseline scores

Browse files

Files changed (3) hide show

baseline_scores.json +19 -19
env/environment.go +18 -6
env/rewards.go +28 -36

baseline_scores.json CHANGED Viewed

@@ -7,50 +7,50 @@
   "llm_every": 4,
   "max_steps": null,
   "task_averages": {
-    "1": 0.2776,
-    "2": 0.2182,
-    "3": 0.3115
   },
-  "overall_average": 0.2691,
   "all_results": [
     {
       "task_id": 1,
       "seed": 1100,
-      "total_reward": 114.28893759320243,
       "total_steps": 96,
-      "elapsed_sec": 1.3370721340179443,
-      "score": 0.2776,
       "sub_scores": {
-        "cost": 0.277555958007489
       },
       "exploit_detected": false
     },
     {
       "task_id": 2,
       "seed": 1200,
-      "total_reward": -625.6665397814021,
       "total_steps": 96,
-      "elapsed_sec": 1.229602336883545,
-      "score": 0.2182,
       "sub_scores": {
-        "cost": 0.2595566056450961,
-        "temperature": 0.15625
       },
       "exploit_detected": false
     },
     {
       "task_id": 3,
       "seed": 1300,
-      "total_reward": -639.8462871515986,
       "total_steps": 96,
-      "elapsed_sec": 1.1910581588745117,
-      "score": 0.3115,
       "sub_scores": {
         "batch_deadline": 1,
-        "carbon": 0.24377839161166936,
-        "cost": 0.25263438913936676,
         "grid_response": 0.21428571428571427,
-        "temperature": 0.14583333333333334
       },
       "exploit_detected": false
     }

   "llm_every": 4,
   "max_steps": null,
   "task_averages": {
+    "1": 0.7063,
+    "2": 0.6333,
+    "3": 0.5966
   },
+  "overall_average": 0.6454,
   "all_results": [
     {
       "task_id": 1,
       "seed": 1100,
+      "total_reward": 251.40178983938813,
       "total_steps": 96,
+      "elapsed_sec": 0.14183712005615234,
+      "score": 0.7063,
       "sub_scores": {
+        "cost": 0.7063441549865395
       },
       "exploit_detected": false
     },
     {
       "task_id": 2,
       "seed": 1200,
+      "total_reward": 246.40262234598185,
       "total_steps": 96,
+      "elapsed_sec": 0.11959218978881836,
+      "score": 0.6333,
       "sub_scores": {
+        "cost": 0.7014155357169216,
+        "temperature": 0.53125
       },
       "exploit_detected": false
     },
     {
       "task_id": 3,
       "seed": 1300,
+      "total_reward": 255.60231973463087,
       "total_steps": 96,
+      "elapsed_sec": 0.12531447410583496,
+      "score": 0.5966,
       "sub_scores": {
         "batch_deadline": 1,
+        "carbon": 0.6574530318382599,
+        "cost": 0.670084941969173,
         "grid_response": 0.21428571428571427,
+        "temperature": 0.5729166666666666
       },
       "exploit_detected": false
     }

env/environment.go CHANGED Viewed

@@ -418,18 +418,30 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
 	batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s)
 	// ----- Thermal dynamics -----
-	// Simple first-order thermal model:
-	// ΔT per step = (HVAC effect + outdoor infiltration + storage discharge effect - process demand)
-	hvacEffect := (act.HVACPowerLevel - 0.5) * 2.0 * 1.5 // ±3°C max swing per step
 	infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03
 	storageEffect := 0.0
-	if act.ThermalChargeRate < 0 { // discharging storage = provides cooling/heating
-		storageEffect = math.Abs(act.ThermalChargeRate) * 0.5
 	}
 	processHeat := b.ProcessDemand * 0.002 // kW→°C rough factor
-	deltaT := hvacEffect + infiltration + storageEffect - processHeat
 	b.IndoorTemperature += deltaT
 	// ----- Energy & cost accounting -----
 	batchPowerDraw := e.batchRunningPower(b)
 	totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW

 	batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s)
 	// ----- Thermal dynamics -----
+	// First-order setpoint-driven model:
+	// HVAC drives temperature toward setpoint; higher power = stronger effect.
+	// At HVACPowerLevel=1.0, HVAC strongly pushes toward setpoint.
+	// At HVACPowerLevel=0.0, HVAC is off — temp drifts with environment.
+	hvacEffect := (b.SetpointTemperature - b.IndoorTemperature) * act.HVACPowerLevel * 0.15
+	// Outdoor infiltration: building slowly equilibrates with outside
 	infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03
+	// Thermal storage discharge provides supplemental conditioning toward setpoint
 	storageEffect := 0.0
+	if act.ThermalChargeRate < 0 {
+		storageEffect = (b.SetpointTemperature - b.IndoorTemperature) * math.Abs(act.ThermalChargeRate) * 0.05
 	}
+	// Process equipment waste heat (always warms the building)
 	processHeat := b.ProcessDemand * 0.002 // kW→°C rough factor
+	deltaT := hvacEffect + infiltration + storageEffect + processHeat
 	b.IndoorTemperature += deltaT
+	// Clamp to physically reasonable indoor range
+	b.IndoorTemperature = math.Max(10.0, math.Min(40.0, b.IndoorTemperature))
 	// ----- Energy & cost accounting -----
 	batchPowerDraw := e.batchRunningPower(b)
 	totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW

env/rewards.go CHANGED Viewed

@@ -25,8 +25,8 @@ type ComputeRewardInput struct {
 }
 // ComputeReward returns a dense RewardComponents struct from the current step inputs.
-// The reward is task-aware: task 1 only cares about cost, task 2 adds temperature,
-// task 3 adds grid response, batch deadlines, and carbon.
 func ComputeReward(inp ComputeRewardInput) RewardComponents {
 	rc := RewardComponents{}
@@ -36,38 +36,32 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
 	rc.CostSavings = 1.5 - (inp.StepCost/typicalCost)*2.0
 	// ── 2. Temperature Constraint ────────────────────────────────────────────
-	// Active for task 2 and 3. Gaussian bonus for being near setpoint.
-	if inp.TaskID >= 2 {
-		temp := inp.B.IndoorTemperature
-		rc.TempConstraint = computeTempReward(temp, inp.B.SetpointTemperature, inp.TMin, inp.TMax)
-	}
 	// ── 3. Grid Stress Response ──────────────────────────────────────────────
-	// Active for task 3. Rewards proactive grid awareness, not just reactive shedding.
-	if inp.TaskID >= 3 {
-		rc.GridResponse = computeGridResponse(inp.GridStress, inp.ShedFraction)
-	}
 	// ── 4. Deadline Penalty / Bonus ──────────────────────────────────────────
-	// Task 2+: penalise missed jobs, reward on-track pending jobs.
-	if inp.TaskID >= 2 {
-		if inp.BatchMissed > 0 {
-			rc.DeadlinePenalty = -float64(inp.BatchMissed) * 1.5
-		}
-		// Positive signal: reward for jobs still on track (not missed yet)
-		onTrackJobs := 0
-		for _, job := range inp.B.Jobs {
-			if !job.Completed && !job.MissedDeadline {
-				onTrackJobs++
-			}
-			if job.Completed && !job.MissedDeadline {
-				onTrackJobs++ // completed on time is even better
-			}
 		}
-		if onTrackJobs > 0 && inp.BatchMissed == 0 {
-			rc.DeadlinePenalty += float64(onTrackJobs) * 0.08
 		}
 	}
 	// ── 5. Efficiency Bonus (thermal storage utilization) ─────────────────────
 	// Rewards smart storage use: arbitrage + maintaining useful storage levels.
@@ -100,15 +94,13 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
 	}
 	// ── 7. Carbon Reward ─────────────────────────────────────────────────────
-	// Active for task 3. Rewards low-carbon operation.
-	if inp.TaskID >= 3 {
-		carbonNorm := math.Max(0, (inp.B.CarbonIntensity-100.0)/600.0)
-		// Baseline bonus, reduced by carbon-heavy consumption
-		rc.CarbonReward = 0.6 - (inp.EnergyKWh * carbonNorm * 0.25)
-		// Extra bonus for operating during genuinely clean grid periods
-		if carbonNorm < 0.3 {
-			rc.CarbonReward += 0.15
-		}
 	}
 	// ── Aggregate ────────────────────────────────────────────────────────────

 }
 // ComputeReward returns a dense RewardComponents struct from the current step inputs.
+// All 7 reward components are always computed for rich per-step signal.
+// Task-specific weighting is handled by the GRADING system (tasks.go), not here.
 func ComputeReward(inp ComputeRewardInput) RewardComponents {
 	rc := RewardComponents{}
 	rc.CostSavings = 1.5 - (inp.StepCost/typicalCost)*2.0
 	// ── 2. Temperature Constraint ────────────────────────────────────────────
+	// Gaussian bonus for being near setpoint; penalty outside comfort bounds.
+	temp := inp.B.IndoorTemperature
+	rc.TempConstraint = computeTempReward(temp, inp.B.SetpointTemperature, inp.TMin, inp.TMax)
 	// ── 3. Grid Stress Response ──────────────────────────────────────────────
+	// Rewards proactive grid awareness and demand-response compliance.
+	rc.GridResponse = computeGridResponse(inp.GridStress, inp.ShedFraction)
 	// ── 4. Deadline Penalty / Bonus ──────────────────────────────────────────
+	// Penalise missed batch jobs, reward on-track pending jobs.
+	if inp.BatchMissed > 0 {
+		rc.DeadlinePenalty = -float64(inp.BatchMissed) * 1.5
+	}
+	// Positive signal: reward for jobs still on track (not missed yet)
+	onTrackJobs := 0
+	for _, job := range inp.B.Jobs {
+		if !job.Completed && !job.MissedDeadline {
+			onTrackJobs++
 		}
+		if job.Completed && !job.MissedDeadline {
+			onTrackJobs++ // completed on time is even better
 		}
 	}
+	if onTrackJobs > 0 && inp.BatchMissed == 0 {
+		rc.DeadlinePenalty += float64(onTrackJobs) * 0.08
+	}
 	// ── 5. Efficiency Bonus (thermal storage utilization) ─────────────────────
 	// Rewards smart storage use: arbitrage + maintaining useful storage levels.
 	}
 	// ── 7. Carbon Reward ─────────────────────────────────────────────────────
+	// Rewards low-carbon operation based on grid carbon intensity.
+	carbonNorm := math.Max(0, (inp.B.CarbonIntensity-100.0)/600.0)
+	// Baseline bonus, reduced by carbon-heavy consumption
+	rc.CarbonReward = 0.6 - (inp.EnergyKWh * carbonNorm * 0.25)
+	// Extra bonus for operating during genuinely clean grid periods
+	if carbonNorm < 0.3 {
+		rc.CarbonReward += 0.15
 	}
 	// ── Aggregate ────────────────────────────────────────────────────────────