adityss commited on
Commit
c009bc5
·
1 Parent(s): b81683f

feat: define GridMind-RL environment data models and task structures

Browse files
Files changed (4) hide show
  1. env/environment.go +9 -8
  2. env/models.go +9 -8
  3. env/tasks.go +146 -0
  4. main.go +1 -0
env/environment.go CHANGED
@@ -250,14 +250,15 @@ func (e *Environment) GetState() StateResponse {
250
  }
251
 
252
  return StateResponse{
253
- Buildings: buildings,
254
- PriceCurve: priceCurve,
255
- CarbonCurve: carbonCurve,
256
- Episode: e.episode,
257
- Step: e.step,
258
- TaskID: e.taskID,
259
- Done: e.done,
260
- Seed: e.seed,
 
261
  }
262
  }
263
 
 
250
  }
251
 
252
  return StateResponse{
253
+ Buildings: buildings,
254
+ PriceCurve: priceCurve,
255
+ CarbonCurve: carbonCurve,
256
+ Episode: e.episode,
257
+ Step: e.step,
258
+ TaskID: e.taskID,
259
+ Done: e.done,
260
+ Seed: e.seed,
261
+ InstructionCard: e.InstructionCard,
262
  }
263
  }
264
 
env/models.go CHANGED
@@ -141,14 +141,15 @@ type ResetResponse struct {
141
 
142
  // StateResponse is returned from GET /state.
143
  type StateResponse struct {
144
- Buildings []BuildingStatePublic `json:"buildings"`
145
- PriceCurve []float64 `json:"price_curve_episode"` // full episode ToU prices
146
- CarbonCurve []float64 `json:"carbon_curve_episode"` // full episode carbon intensities
147
- Episode int `json:"episode"`
148
- Step int `json:"step"`
149
- TaskID int `json:"task_id"`
150
- Done bool `json:"done"`
151
- Seed int64 `json:"seed"`
 
152
  }
153
 
154
  // BuildingStatePublic is the dashboard-friendly full state per building.
 
141
 
142
  // StateResponse is returned from GET /state.
143
  type StateResponse struct {
144
+ Buildings []BuildingStatePublic `json:"buildings"`
145
+ PriceCurve []float64 `json:"price_curve_episode"` // full episode ToU prices
146
+ CarbonCurve []float64 `json:"carbon_curve_episode"` // full episode carbon intensities
147
+ Episode int `json:"episode"`
148
+ Step int `json:"step"`
149
+ TaskID int `json:"task_id"`
150
+ Done bool `json:"done"`
151
+ Seed int64 `json:"seed"`
152
+ InstructionCard *InstructionCard `json:"instruction_card,omitempty"` // Task 4 only
153
  }
154
 
155
  // BuildingStatePublic is the dashboard-friendly full state per building.
env/tasks.go CHANGED
@@ -167,6 +167,7 @@ type GradeEpisodeInput struct {
167
  TMin float64
168
  TMax float64
169
  ExploitPenalties []float64
 
170
  }
171
 
172
  // GradeEpisode computes a deterministic 0.0–1.0 score for a completed episode.
@@ -185,6 +186,8 @@ func GradeEpisode(inp GradeEpisodeInput) EpisodeGrade {
185
  grade = gradeTask2(inp, grade)
186
  case 3:
187
  grade = gradeTask3(inp, grade)
 
 
188
  default:
189
  grade = gradeTask1(inp, grade)
190
  }
@@ -346,3 +349,146 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
346
  grade.Details["baseline_carbon"] = baselineCarbon
347
  return grade
348
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  TMin float64
168
  TMax float64
169
  ExploitPenalties []float64
170
+ InstructionCard *InstructionCard // set for Task 4 episodes
171
  }
172
 
173
  // GradeEpisode computes a deterministic 0.0–1.0 score for a completed episode.
 
186
  grade = gradeTask2(inp, grade)
187
  case 3:
188
  grade = gradeTask3(inp, grade)
189
+ case 4:
190
+ grade = gradeTask4(inp, grade)
191
  default:
192
  grade = gradeTask1(inp, grade)
193
  }
 
349
  grade.Details["baseline_carbon"] = baselineCarbon
350
  return grade
351
  }
352
+
353
+ // ── Task 4: Instruction-Following Operator ───────────────────────────────────
354
+
355
+ // gradeTask4 evaluates how well the agent satisfied the natural-language
356
+ // instruction card issued at reset. It reads the InstructionCard from Building 0,
357
+ // checks each target that appears in the card, and computes a weighted score.
358
+ // Falls back to Task 3 grading when no instruction card is available.
359
+ func gradeTask4(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
360
+ // Require an instruction card — passed from the environment at grade time
361
+ if inp.InstructionCard == nil {
362
+ // Fallback: grade as Task 3 (no card to evaluate)
363
+ return gradeTask3(inp, grade)
364
+ }
365
+
366
+ card := inp.InstructionCard
367
+ weights := card.Weights
368
+ targets := card.Targets
369
+
370
+ // Always compute base sub-scores — reuse existing graders
371
+ base := gradeTask3(inp, EpisodeGrade{
372
+ TaskID: inp.TaskID,
373
+ SubScores: map[string]float64{},
374
+ Details: map[string]interface{}{},
375
+ })
376
+ costScore := base.SubScores["cost"]
377
+ tempScore := base.SubScores["temperature"]
378
+ gridScore := base.SubScores["grid_response"]
379
+ carbonScore := base.SubScores["carbon"]
380
+
381
+ // ── Card-specific KPI checks ─────────────────────────────────────────────
382
+
383
+ // KPI 1: Cost cap — did the agent stay under max_cost?
384
+ taskCompletionScore := 0.5 // default partial credit
385
+ if maxCost, ok := targets["max_cost"]; ok && maxCost > 0 {
386
+ agentCost := 0.0
387
+ for _, b := range inp.Buildings {
388
+ agentCost += b.CumulativeCost
389
+ }
390
+ if agentCost <= maxCost {
391
+ taskCompletionScore = 1.0
392
+ } else {
393
+ // Partial credit: how close were they?
394
+ taskCompletionScore = math.Max(0, 1.0-(agentCost-maxCost)/maxCost)
395
+ }
396
+ grade.Details["target_max_cost"] = maxCost
397
+ grade.Details["actual_cost"] = agentCost
398
+ }
399
+
400
+ // KPI 2: Temperature bounds — never violated t_min / t_max
401
+ if tMin, hasTMin := targets["t_min"]; hasTMin {
402
+ tMax, hasTMax := targets["t_max"]
403
+ if hasTMax {
404
+ totalSteps := 0
405
+ withinBounds := 0
406
+ for _, history := range inp.TempHistory {
407
+ for _, temp := range history {
408
+ totalSteps++
409
+ if temp >= tMin && temp <= tMax {
410
+ withinBounds++
411
+ }
412
+ }
413
+ }
414
+ if totalSteps > 0 {
415
+ adherence := float64(withinBounds) / float64(totalSteps)
416
+ // Strict: full credit only if ALWAYS within bounds
417
+ taskCompletionScore = adherence
418
+ }
419
+ grade.Details["target_t_min"] = tMin
420
+ grade.Details["target_t_max"] = tMax
421
+ }
422
+ }
423
+
424
+ // KPI 3: Grid response SLA — shed >= min_shed_fraction when stress > 0.7
425
+ if minShed, ok := targets["min_shed_fraction"]; ok {
426
+ stressSteps := 0
427
+ compliantSteps := 0
428
+ for _, entry := range inp.Replay {
429
+ if entry.Observation.GridStressSignal > 0.7 {
430
+ stressSteps++
431
+ if entry.Action.LoadShedFraction >= minShed {
432
+ compliantSteps++
433
+ }
434
+ }
435
+ }
436
+ if stressSteps > 0 {
437
+ taskCompletionScore = float64(compliantSteps) / float64(stressSteps)
438
+ }
439
+ grade.Details["target_min_shed"] = minShed
440
+ grade.Details["stress_steps"] = stressSteps
441
+ grade.Details["compliant_steps"] = compliantSteps
442
+ }
443
+
444
+ // KPI 4: Carbon reduction — did agent beat baseline by carbon_reduction target?
445
+ if carbonTarget, ok := targets["carbon_reduction"]; ok {
446
+ agentCarbon := 0.0
447
+ baselineCarbon := 0.0
448
+ for _, b := range inp.Buildings {
449
+ agentCarbon += b.CumulativeCarbon
450
+ baselineCarbon += b.BaselineCarbon
451
+ }
452
+ if baselineCarbon > 0 {
453
+ actualReduction := 1.0 - agentCarbon/baselineCarbon
454
+ if actualReduction >= carbonTarget {
455
+ taskCompletionScore = 1.0
456
+ } else {
457
+ taskCompletionScore = math.Max(0, actualReduction/carbonTarget)
458
+ }
459
+ }
460
+ grade.Details["target_carbon_reduction"] = carbonTarget
461
+ }
462
+
463
+ // ── Weighted final score ─────────────────────────────────────────────────
464
+ // Use weights from the card; fall back to Task 4 defaults if missing
465
+ wTask := getWeight(weights, "task_completion", 0.50)
466
+ wCost := getWeight(weights, "cost", 0.20)
467
+ wTemp := getWeight(weights, "temperature", 0.20)
468
+ wGrid := getWeight(weights, "grid_response", 0.05)
469
+ wCarbon := getWeight(weights, "carbon", 0.05)
470
+
471
+ finalScore := taskCompletionScore*wTask +
472
+ costScore*wCost +
473
+ tempScore*wTemp +
474
+ gridScore*wGrid +
475
+ carbonScore*wCarbon
476
+
477
+ grade.SubScores["task_completion"] = clampOpenInterval(math.Round(taskCompletionScore*10000) / 10000)
478
+ grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
479
+ grade.SubScores["temperature"] = clampOpenInterval(math.Round(tempScore*10000) / 10000)
480
+ grade.SubScores["grid_response"] = clampOpenInterval(math.Round(gridScore*10000) / 10000)
481
+ grade.SubScores["carbon"] = clampOpenInterval(math.Round(carbonScore*10000) / 10000)
482
+ grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)
483
+
484
+ grade.Details["instruction_card_text"] = card.Text
485
+ return grade
486
+ }
487
+
488
+ // getWeight safely retrieves a weight from a map, returning defaultVal if missing.
489
+ func getWeight(weights map[string]float64, key string, defaultVal float64) float64 {
490
+ if v, ok := weights[key]; ok {
491
+ return v
492
+ }
493
+ return defaultVal
494
+ }
main.go CHANGED
@@ -382,6 +382,7 @@ func (s *Server) handleGrade(w http.ResponseWriter, r *http.Request) {
382
  TMin: env.TMinDefault,
383
  TMax: env.TMaxDefault,
384
  ExploitPenalties: penalties,
 
385
  })
386
 
387
  w.Header().Set("Content-Type", "application/json")
 
382
  TMin: env.TMinDefault,
383
  TMax: env.TMaxDefault,
384
  ExploitPenalties: penalties,
385
+ InstructionCard: state.InstructionCard,
386
  })
387
 
388
  w.Header().Set("Content-Type", "application/json")