adityss commited on
Commit
3b977fc
·
1 Parent(s): 88da572

refactor: replace heuristic log generation with Go-based environment simulation and update API schema

Browse files
.gitignore CHANGED
@@ -77,4 +77,5 @@ wandb/
77
  IMPLEMENTATION_PLAN.md
78
  TODO_SPRINT.md
79
  context.md
80
- REPORT.md
 
 
77
  IMPLEMENTATION_PLAN.md
78
  TODO_SPRINT.md
79
  context.md
80
+ REPORT.mdgridmind.exe
81
+ gridmind-server.exe
baseline_scores.json CHANGED
@@ -7,17 +7,50 @@
7
  "llm_every": 8,
8
  "max_steps": null,
9
  "task_averages": {
10
- "1": 0.5482
 
 
 
11
  },
12
- "overall_average": 0.5482,
13
  "all_results": [
14
  {
15
  "task_id": 1,
16
  "seed": 1100,
17
- "total_reward": 249.22208122816207,
18
  "total_steps": 96,
19
- "elapsed_sec": 1.4036986827850342,
20
- "score": 0.5482,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "sub_scores": {},
22
  "exploit_detected": false
23
  }
 
7
  "llm_every": 8,
8
  "max_steps": null,
9
  "task_averages": {
10
+ "1": 0.4942,
11
+ "2": 0.4707,
12
+ "3": 0.7478,
13
+ "4": 0.4779
14
  },
15
+ "overall_average": 0.54765,
16
  "all_results": [
17
  {
18
  "task_id": 1,
19
  "seed": 1100,
20
+ "total_reward": 251.84571448658104,
21
  "total_steps": 96,
22
+ "elapsed_sec": 1.227782964706421,
23
+ "score": 0.4942,
24
+ "sub_scores": {},
25
+ "exploit_detected": false
26
+ },
27
+ {
28
+ "task_id": 2,
29
+ "seed": 1200,
30
+ "total_reward": 245.38403598363988,
31
+ "total_steps": 96,
32
+ "elapsed_sec": 0.8327796459197998,
33
+ "score": 0.4707,
34
+ "sub_scores": {},
35
+ "exploit_detected": false
36
+ },
37
+ {
38
+ "task_id": 3,
39
+ "seed": 1300,
40
+ "total_reward": 242.06080137356216,
41
+ "total_steps": 96,
42
+ "elapsed_sec": 0.6833479404449463,
43
+ "score": 0.7478,
44
+ "sub_scores": {},
45
+ "exploit_detected": false
46
+ },
47
+ {
48
+ "task_id": 4,
49
+ "seed": 1400,
50
+ "total_reward": 206.4647897455665,
51
+ "total_steps": 96,
52
+ "elapsed_sec": 1.0237984657287598,
53
+ "score": 0.4779,
54
  "sub_scores": {},
55
  "exploit_detected": false
56
  }
env/environment.go CHANGED
@@ -587,6 +587,7 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
587
  Episode: e.episode,
588
  Step: s,
589
  },
 
590
  }
591
  }
592
 
@@ -692,6 +693,29 @@ func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
692
  // Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp
693
  reportedTemp := b.IndoorTemperature + b.TempObservationNoise
694
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
  return ObservationModel{
696
  IndoorTemperature: math.Round(reportedTemp*100) / 100,
697
  ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
@@ -707,6 +731,13 @@ func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
707
  HVACEfficiency: math.Round(b.HVACEfficiency*1000) / 1000,
708
  InstructionCard: e.InstructionCard,
709
  ActiveFaults: activeFaults,
 
 
 
 
 
 
 
710
  }
711
  }
712
 
 
587
  Episode: e.episode,
588
  Step: s,
589
  },
590
+ Rewards: rc,
591
  }
592
  }
593
 
 
693
  // Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp
694
  reportedTemp := b.IndoorTemperature + b.TempObservationNoise
695
 
696
+ taskCardStr := ""
697
+ if e.taskID == 4 && e.InstructionCard != nil {
698
+ taskCardStr = e.InstructionCard.Text
699
+ } else if e.taskID == 1 {
700
+ taskCardStr = "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage."
701
+ } else if e.taskID == 2 {
702
+ taskCardStr = "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost."
703
+ } else if e.taskID == 3 {
704
+ taskCardStr = "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon."
705
+ } else {
706
+ taskCardStr = "Maintain operations and minimize cost."
707
+ }
708
+
709
+ priceForecast := make([]float64, 4)
710
+ for i := 0; i < 4; i++ {
711
+ idx := b.Step + i
712
+ if idx < EpisodeSteps {
713
+ priceForecast[i] = math.Round(e.PriceCurve[idx]*10000) / 10000
714
+ } else {
715
+ priceForecast[i] = math.Round(e.PriceCurve[EpisodeSteps-1]*10000) / 10000
716
+ }
717
+ }
718
+
719
  return ObservationModel{
720
  IndoorTemperature: math.Round(reportedTemp*100) / 100,
721
  ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
 
731
  HVACEfficiency: math.Round(b.HVACEfficiency*1000) / 1000,
732
  InstructionCard: e.InstructionCard,
733
  ActiveFaults: activeFaults,
734
+ TaskCard: taskCardStr,
735
+ NLSummary: "GridMind simulation state.",
736
+ MarketType: "tou",
737
+ Season: "summer",
738
+ PriceVolatility: 0.2,
739
+ PriceForecast: priceForecast,
740
+ DemandChargeActive: false,
741
  }
742
  }
743
 
env/models.go CHANGED
@@ -76,6 +76,13 @@ type ObservationModel struct {
76
  HVACEfficiency float64 `json:"hvac_efficiency"`
77
  InstructionCard *InstructionCard `json:"instruction_card,omitempty"` // populated for Task 4 only
78
  ActiveFaults []string `json:"active_faults,omitempty"` // human-readable alarm strings for active faults
 
 
 
 
 
 
 
79
  }
80
 
81
  // ActionModel is the parsed agent action for a single step.
@@ -87,18 +94,19 @@ type ActionModel struct {
87
  BuildingID int `json:"building_id"` // which building to act on
88
  }
89
 
90
- // RewardComponents holds the individual components of the dense reward signal.
91
  type RewardComponents struct {
92
- CostSavings float64 `json:"cost_savings"` // negative = expensive
93
- TempConstraint float64 `json:"temp_constraint"` // positive = within bounds
94
- GridResponse float64 `json:"grid_response"` // bonus for DR compliance
95
- DeadlinePenalty float64 `json:"deadline_penalty"` // negative for missed jobs
96
- EfficiencyBonus float64 `json:"efficiency_bonus"` // storage arbitrage
97
- StabilityPenalty float64 `json:"stability_penalty"` // HVAC oscillation penalty
98
- CarbonReward float64 `json:"carbon_reward"` // low-carbon bonus
99
- InstructionReward float64 `json:"instruction_reward"` // Task 4: instruction-following score
100
- FaultMitigation float64 `json:"fault_mitigation"` // Track 3: reward for proper fault response
101
- Total float64 `json:"total"`
 
 
102
  }
103
 
104
  // StepResponse is the full HTTP body returned from POST /step.
@@ -107,6 +115,7 @@ type StepResponse struct {
107
  Reward float64 `json:"reward"`
108
  Done bool `json:"done"`
109
  Info StepInfo `json:"info"`
 
110
  }
111
 
112
  // StepInfo carries auxiliary information per step.
 
76
  HVACEfficiency float64 `json:"hvac_efficiency"`
77
  InstructionCard *InstructionCard `json:"instruction_card,omitempty"` // populated for Task 4 only
78
  ActiveFaults []string `json:"active_faults,omitempty"` // human-readable alarm strings for active faults
79
+ TaskCard string `json:"task_card"`
80
+ NLSummary string `json:"nl_summary"`
81
+ MarketType string `json:"market_type"`
82
+ Season string `json:"season"`
83
+ PriceVolatility float64 `json:"price_volatility"`
84
+ PriceForecast []float64 `json:"price_forecast"`
85
+ DemandChargeActive bool `json:"demand_charge_active"`
86
  }
87
 
88
  // ActionModel is the parsed agent action for a single step.
 
94
  BuildingID int `json:"building_id"` // which building to act on
95
  }
96
 
 
97
  type RewardComponents struct {
98
+ CostSavings float64 `json:"cost_savings"`
99
+ TempConstraint float64 `json:"temperature_constraint"`
100
+ GridResponse float64 `json:"grid_response"`
101
+ DeadlinePenalty float64 `json:"deadline_penalty"`
102
+ EfficiencyBonus float64 `json:"efficiency_bonus"`
103
+ StabilityPenalty float64 `json:"stability_penalty"`
104
+ CarbonReward float64 `json:"carbon_reward"`
105
+ InstructionReward float64 `json:"task_satisfaction"`
106
+ FaultMitigation float64 `json:"fault_mitigation"`
107
+ PriceAnticipation float64 `json:"price_anticipation"`
108
+ DemandChargePenalty float64 `json:"demand_charge_penalty"`
109
+ Total float64 `json:"total"`
110
  }
111
 
112
  // StepResponse is the full HTTP body returned from POST /step.
 
115
  Reward float64 `json:"reward"`
116
  Done bool `json:"done"`
117
  Info StepInfo `json:"info"`
118
+ Rewards RewardComponents `json:"rewards"`
119
  }
120
 
121
  // StepInfo carries auxiliary information per step.
generate_realistic_training_log.py DELETED
@@ -1,33 +0,0 @@
1
- #!/usr/bin/env python3
2
- import csv, random, math, os
3
-
4
- random.seed(42)
5
- os.makedirs("results", exist_ok=True)
6
-
7
- rows = []
8
- for step in range(0, 301, 5):
9
- progress = step / 300
10
- base = 0.52 + (0.68 - 0.52) * (1 - math.exp(-3 * progress)) + random.gauss(0, 0.015)
11
- json_valid = min(0.2, 0.15 + random.gauss(0, 0.03))
12
- rows.append({
13
- "step": step,
14
- "loss": max(0.000001, 0.00002 - progress * 0.00001 + random.gauss(0, 0.000005)),
15
- "rewards/reward_json_valid/mean": max(0, min(0.2, json_valid)),
16
- "rewards/reward_json_valid/std": 0.02,
17
- "rewards/reward_env_interaction/mean": max(0.4, min(0.75, base)),
18
- "rewards/reward_env_interaction/std": 0.02,
19
- "rewards/reward/mean": 0.20 + json_valid + max(0.4, min(0.75, base)) * 0.4,
20
- })
21
-
22
- columns = ["step", "loss", "rewards/reward_json_valid/mean", "rewards/reward_json_valid/std",
23
- "rewards/reward_env_interaction/mean", "rewards/reward_env_interaction/std", "rewards/reward/mean"]
24
-
25
- with open("results/training_log.csv", "w", newline="") as f:
26
- writer = csv.DictWriter(f, fieldnames=columns)
27
- writer.writeheader()
28
- writer.writerows(rows)
29
-
30
- print(f"Generated {len(rows)} training steps with realistic learning curve")
31
- print(f"Initial episode score: {rows[0]['rewards/reward_env_interaction/mean']:.3f}")
32
- print(f"Final episode score: {rows[-1]['rewards/reward_env_interaction/mean']:.3f}")
33
- print(f"Improvement: {(rows[-1]['rewards/reward_env_interaction/mean'] - rows[0]['rewards/reward_env_interaction/mean']):.3f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generate_training_log.py DELETED
@@ -1,52 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- GridMind-RL Training Log Generator
4
- Generates a realistic training log CSV from heuristic baseline runs.
5
- """
6
- import csv
7
- import os
8
- import json
9
- import random
10
- import math
11
-
12
- random.seed(42)
13
-
14
- os.makedirs("results", exist_ok=True)
15
-
16
- with open("results/baseline_scores_heuristic.json") as f:
17
- heuristic_data = json.load(f)
18
-
19
- heuristic_by_task = {int(k): v for k, v in heuristic_data["task_averages"].items()}
20
- overall_heuristic = heuristic_data["overall_average"]
21
- llm_baseline = 0.65
22
- target_performance = 0.72
23
-
24
- N_STEPS = 200
25
- NOISE_SCALE = 0.02
26
- IMPROVEMENT_RATE = 0.003
27
-
28
- rows = []
29
- for step in range(0, N_STEPS + 1, 5):
30
- progress = step / N_STEPS
31
- base = overall_heuristic + (target_performance - overall_heuristic) * math.sin(progress * math.pi / 2)
32
- loss = 2.0 - progress * 1.5 + random.gauss(0, 0.1)
33
- reward_valid = 0.3 + random.gauss(0, 0.02)
34
- reward_keys = 0.3 + random.gauss(0, 0.02)
35
- reward_env = base * 0.4 + random.gauss(0, NOISE_SCALE)
36
- rows.append({
37
- "step": step,
38
- "loss": max(0.1, loss),
39
- "reward_valid_json": reward_valid,
40
- "reward_has_required_keys": reward_keys,
41
- "reward_env_interaction": max(0.0, min(0.4, reward_env)),
42
- })
43
-
44
- with open("results/training_log.csv", "w", newline="") as f:
45
- writer = csv.DictWriter(f, fieldnames=["step", "loss", "reward_valid_json", "reward_has_required_keys", "reward_env_interaction"])
46
- writer.writeheader()
47
- writer.writerows(rows)
48
-
49
- print(f"Generated {len(rows)} training steps -> results/training_log.csv")
50
- print(f"Heuristic baseline: {overall_heuristic:.3f}")
51
- print(f"Target performance: {target_performance:.3f}")
52
- print(f"Final reward_env: {rows[-1]['reward_env_interaction']:.3f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inference.py CHANGED
@@ -39,7 +39,7 @@ except ImportError:
39
  ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
40
  HF_TOKEN = os.getenv("HF_TOKEN") # Mandatory — no default
41
  API_BASE_URL = os.getenv("API_BASE_URL", "https://api-inference.huggingface.co/v1")
42
- MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
43
 
44
  # ── Constants ────────────────────────────────────────────────────────────────
45
  BENCHMARK = "gridmind"
 
39
  ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
40
  HF_TOKEN = os.getenv("HF_TOKEN") # Mandatory — no default
41
  API_BASE_URL = os.getenv("API_BASE_URL", "https://api-inference.huggingface.co/v1")
42
+ MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct")
43
 
44
  # ── Constants ────────────────────────────────────────────────────────────────
45
  BENCHMARK = "gridmind"
main.go CHANGED
@@ -240,15 +240,39 @@ func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
240
  http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
241
  return
242
  }
243
- var req env.ResetRequest
244
  if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
245
- // Allow empty body → defaults
246
- req = env.ResetRequest{TaskID: 1}
247
  }
248
- if req.TaskID == 0 {
249
- req.TaskID = 1
 
 
 
 
 
250
  }
251
- resp := s.envMgr.Reset(req)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  w.Header().Set("Content-Type", "application/json")
253
  json.NewEncoder(w).Encode(resp)
254
  }
 
240
  http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
241
  return
242
  }
243
+ var req map[string]interface{}
244
  if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
245
+ req = make(map[string]interface{})
 
246
  }
247
+ taskID := 1
248
+ if t, ok := req["task_id"].(float64); ok {
249
+ taskID = int(t)
250
+ } else if t, ok := req["task_id"].(int64); ok {
251
+ taskID = int(t)
252
+ } else if t, ok := req["task_id"].(int); ok {
253
+ taskID = t
254
  }
255
+ if taskID == 0 {
256
+ taskID = 1
257
+ }
258
+ seed := int64(0)
259
+ if s, ok := req["seed"].(float64); ok {
260
+ seed = int64(s)
261
+ } else if s, ok := req["seed"].(int64); ok {
262
+ seed = s
263
+ }
264
+ numBuildings := 1
265
+ if nb, ok := req["num_buildings"].(float64); ok {
266
+ numBuildings = int(nb)
267
+ }
268
+ resetReq := env.ResetRequest{
269
+ TaskID: taskID,
270
+ NumBuildings: numBuildings,
271
+ }
272
+ if seed > 0 {
273
+ resetReq.Seed = &seed
274
+ }
275
+ resp := s.envMgr.Reset(resetReq)
276
  w.Header().Set("Content-Type", "application/json")
277
  json.NewEncoder(w).Encode(resp)
278
  }
openenv.yaml CHANGED
@@ -77,6 +77,22 @@ schemas:
77
  items:
78
  type: string
79
  description: "Human-readable list of active fault alarm strings. Empty when no faults. Track 3."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  instruction_card:
81
  type: [object, "null"]
82
  description: "Natural language objective card. Only populated when task_id=4. Track 2."
@@ -210,10 +226,14 @@ schemas:
210
  type: number
211
  carbon_reward:
212
  type: number
213
- instruction_reward:
214
  type: number
215
  fault_mitigation:
216
  type: number
 
 
 
 
217
  total:
218
  type: number
219
  energy_used_kwh:
@@ -344,11 +364,11 @@ endpoints:
344
  health:
345
  path: /health
346
  method: GET
347
- description: Health check - returns {"status": "ok", "version": "1.0.0"}
348
  ping:
349
  path: /ping
350
  method: GET
351
- description: Liveness probe - returns {"status": "ok"}
352
  reset:
353
  path: /reset
354
  method: POST
 
77
  items:
78
  type: string
79
  description: "Human-readable list of active fault alarm strings. Empty when no faults. Track 3."
80
+ task_card:
81
+ type: string
82
+ nl_summary:
83
+ type: string
84
+ market_type:
85
+ type: string
86
+ season:
87
+ type: string
88
+ price_volatility:
89
+ type: number
90
+ price_forecast:
91
+ type: array
92
+ items:
93
+ type: number
94
+ demand_charge_active:
95
+ type: boolean
96
  instruction_card:
97
  type: [object, "null"]
98
  description: "Natural language objective card. Only populated when task_id=4. Track 2."
 
226
  type: number
227
  carbon_reward:
228
  type: number
229
+ task_satisfaction:
230
  type: number
231
  fault_mitigation:
232
  type: number
233
+ price_anticipation:
234
+ type: number
235
+ demand_charge_penalty:
236
+ type: number
237
  total:
238
  type: number
239
  energy_used_kwh:
 
364
  health:
365
  path: /health
366
  method: GET
367
+ description: 'Health check - returns {"status": "ok", "version": "1.0.0"}'
368
  ping:
369
  path: /ping
370
  method: GET
371
+ description: 'Liveness probe - returns {"status": "ok"}'
372
  reset:
373
  path: /reset
374
  method: POST