Spaces:
Sleeping
Sleeping
feat: define GridMind-RL environment data models and task structures
Browse files- env/environment.go +9 -8
- env/models.go +9 -8
- env/tasks.go +146 -0
- main.go +1 -0
env/environment.go
CHANGED
|
@@ -250,14 +250,15 @@ func (e *Environment) GetState() StateResponse {
|
|
| 250 |
}
|
| 251 |
|
| 252 |
return StateResponse{
|
| 253 |
-
Buildings:
|
| 254 |
-
PriceCurve:
|
| 255 |
-
CarbonCurve:
|
| 256 |
-
Episode:
|
| 257 |
-
Step:
|
| 258 |
-
TaskID:
|
| 259 |
-
Done:
|
| 260 |
-
Seed:
|
|
|
|
| 261 |
}
|
| 262 |
}
|
| 263 |
|
|
|
|
| 250 |
}
|
| 251 |
|
| 252 |
return StateResponse{
|
| 253 |
+
Buildings: buildings,
|
| 254 |
+
PriceCurve: priceCurve,
|
| 255 |
+
CarbonCurve: carbonCurve,
|
| 256 |
+
Episode: e.episode,
|
| 257 |
+
Step: e.step,
|
| 258 |
+
TaskID: e.taskID,
|
| 259 |
+
Done: e.done,
|
| 260 |
+
Seed: e.seed,
|
| 261 |
+
InstructionCard: e.InstructionCard,
|
| 262 |
}
|
| 263 |
}
|
| 264 |
|
env/models.go
CHANGED
|
@@ -141,14 +141,15 @@ type ResetResponse struct {
|
|
| 141 |
|
| 142 |
// StateResponse is returned from GET /state.
|
| 143 |
type StateResponse struct {
|
| 144 |
-
Buildings
|
| 145 |
-
PriceCurve
|
| 146 |
-
CarbonCurve
|
| 147 |
-
Episode
|
| 148 |
-
Step
|
| 149 |
-
TaskID
|
| 150 |
-
Done
|
| 151 |
-
Seed
|
|
|
|
| 152 |
}
|
| 153 |
|
| 154 |
// BuildingStatePublic is the dashboard-friendly full state per building.
|
|
|
|
| 141 |
|
| 142 |
// StateResponse is returned from GET /state.
|
| 143 |
type StateResponse struct {
|
| 144 |
+
Buildings []BuildingStatePublic `json:"buildings"`
|
| 145 |
+
PriceCurve []float64 `json:"price_curve_episode"` // full episode ToU prices
|
| 146 |
+
CarbonCurve []float64 `json:"carbon_curve_episode"` // full episode carbon intensities
|
| 147 |
+
Episode int `json:"episode"`
|
| 148 |
+
Step int `json:"step"`
|
| 149 |
+
TaskID int `json:"task_id"`
|
| 150 |
+
Done bool `json:"done"`
|
| 151 |
+
Seed int64 `json:"seed"`
|
| 152 |
+
InstructionCard *InstructionCard `json:"instruction_card,omitempty"` // Task 4 only
|
| 153 |
}
|
| 154 |
|
| 155 |
// BuildingStatePublic is the dashboard-friendly full state per building.
|
env/tasks.go
CHANGED
|
@@ -167,6 +167,7 @@ type GradeEpisodeInput struct {
|
|
| 167 |
TMin float64
|
| 168 |
TMax float64
|
| 169 |
ExploitPenalties []float64
|
|
|
|
| 170 |
}
|
| 171 |
|
| 172 |
// GradeEpisode computes a deterministic 0.0–1.0 score for a completed episode.
|
|
@@ -185,6 +186,8 @@ func GradeEpisode(inp GradeEpisodeInput) EpisodeGrade {
|
|
| 185 |
grade = gradeTask2(inp, grade)
|
| 186 |
case 3:
|
| 187 |
grade = gradeTask3(inp, grade)
|
|
|
|
|
|
|
| 188 |
default:
|
| 189 |
grade = gradeTask1(inp, grade)
|
| 190 |
}
|
|
@@ -346,3 +349,146 @@ func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
|
| 346 |
grade.Details["baseline_carbon"] = baselineCarbon
|
| 347 |
return grade
|
| 348 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
TMin float64
|
| 168 |
TMax float64
|
| 169 |
ExploitPenalties []float64
|
| 170 |
+
InstructionCard *InstructionCard // set for Task 4 episodes
|
| 171 |
}
|
| 172 |
|
| 173 |
// GradeEpisode computes a deterministic 0.0–1.0 score for a completed episode.
|
|
|
|
| 186 |
grade = gradeTask2(inp, grade)
|
| 187 |
case 3:
|
| 188 |
grade = gradeTask3(inp, grade)
|
| 189 |
+
case 4:
|
| 190 |
+
grade = gradeTask4(inp, grade)
|
| 191 |
default:
|
| 192 |
grade = gradeTask1(inp, grade)
|
| 193 |
}
|
|
|
|
| 349 |
grade.Details["baseline_carbon"] = baselineCarbon
|
| 350 |
return grade
|
| 351 |
}
|
| 352 |
+
|
| 353 |
+
// ── Task 4: Instruction-Following Operator ───────────────────────────────────
|
| 354 |
+
|
| 355 |
+
// gradeTask4 evaluates how well the agent satisfied the natural-language
|
| 356 |
+
// instruction card issued at reset. It reads the InstructionCard from Building 0,
|
| 357 |
+
// checks each target that appears in the card, and computes a weighted score.
|
| 358 |
+
// Falls back to Task 3 grading when no instruction card is available.
|
| 359 |
+
func gradeTask4(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
|
| 360 |
+
// Require an instruction card — passed from the environment at grade time
|
| 361 |
+
if inp.InstructionCard == nil {
|
| 362 |
+
// Fallback: grade as Task 3 (no card to evaluate)
|
| 363 |
+
return gradeTask3(inp, grade)
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
card := inp.InstructionCard
|
| 367 |
+
weights := card.Weights
|
| 368 |
+
targets := card.Targets
|
| 369 |
+
|
| 370 |
+
// Always compute base sub-scores — reuse existing graders
|
| 371 |
+
base := gradeTask3(inp, EpisodeGrade{
|
| 372 |
+
TaskID: inp.TaskID,
|
| 373 |
+
SubScores: map[string]float64{},
|
| 374 |
+
Details: map[string]interface{}{},
|
| 375 |
+
})
|
| 376 |
+
costScore := base.SubScores["cost"]
|
| 377 |
+
tempScore := base.SubScores["temperature"]
|
| 378 |
+
gridScore := base.SubScores["grid_response"]
|
| 379 |
+
carbonScore := base.SubScores["carbon"]
|
| 380 |
+
|
| 381 |
+
// ── Card-specific KPI checks ─────────────────────────────────────────────
|
| 382 |
+
|
| 383 |
+
// KPI 1: Cost cap — did the agent stay under max_cost?
|
| 384 |
+
taskCompletionScore := 0.5 // default partial credit
|
| 385 |
+
if maxCost, ok := targets["max_cost"]; ok && maxCost > 0 {
|
| 386 |
+
agentCost := 0.0
|
| 387 |
+
for _, b := range inp.Buildings {
|
| 388 |
+
agentCost += b.CumulativeCost
|
| 389 |
+
}
|
| 390 |
+
if agentCost <= maxCost {
|
| 391 |
+
taskCompletionScore = 1.0
|
| 392 |
+
} else {
|
| 393 |
+
// Partial credit: how close were they?
|
| 394 |
+
taskCompletionScore = math.Max(0, 1.0-(agentCost-maxCost)/maxCost)
|
| 395 |
+
}
|
| 396 |
+
grade.Details["target_max_cost"] = maxCost
|
| 397 |
+
grade.Details["actual_cost"] = agentCost
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
// KPI 2: Temperature bounds — never violated t_min / t_max
|
| 401 |
+
if tMin, hasTMin := targets["t_min"]; hasTMin {
|
| 402 |
+
tMax, hasTMax := targets["t_max"]
|
| 403 |
+
if hasTMax {
|
| 404 |
+
totalSteps := 0
|
| 405 |
+
withinBounds := 0
|
| 406 |
+
for _, history := range inp.TempHistory {
|
| 407 |
+
for _, temp := range history {
|
| 408 |
+
totalSteps++
|
| 409 |
+
if temp >= tMin && temp <= tMax {
|
| 410 |
+
withinBounds++
|
| 411 |
+
}
|
| 412 |
+
}
|
| 413 |
+
}
|
| 414 |
+
if totalSteps > 0 {
|
| 415 |
+
adherence := float64(withinBounds) / float64(totalSteps)
|
| 416 |
+
// Strict: full credit only if ALWAYS within bounds
|
| 417 |
+
taskCompletionScore = adherence
|
| 418 |
+
}
|
| 419 |
+
grade.Details["target_t_min"] = tMin
|
| 420 |
+
grade.Details["target_t_max"] = tMax
|
| 421 |
+
}
|
| 422 |
+
}
|
| 423 |
+
|
| 424 |
+
// KPI 3: Grid response SLA — shed >= min_shed_fraction when stress > 0.7
|
| 425 |
+
if minShed, ok := targets["min_shed_fraction"]; ok {
|
| 426 |
+
stressSteps := 0
|
| 427 |
+
compliantSteps := 0
|
| 428 |
+
for _, entry := range inp.Replay {
|
| 429 |
+
if entry.Observation.GridStressSignal > 0.7 {
|
| 430 |
+
stressSteps++
|
| 431 |
+
if entry.Action.LoadShedFraction >= minShed {
|
| 432 |
+
compliantSteps++
|
| 433 |
+
}
|
| 434 |
+
}
|
| 435 |
+
}
|
| 436 |
+
if stressSteps > 0 {
|
| 437 |
+
taskCompletionScore = float64(compliantSteps) / float64(stressSteps)
|
| 438 |
+
}
|
| 439 |
+
grade.Details["target_min_shed"] = minShed
|
| 440 |
+
grade.Details["stress_steps"] = stressSteps
|
| 441 |
+
grade.Details["compliant_steps"] = compliantSteps
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
// KPI 4: Carbon reduction — did agent beat baseline by carbon_reduction target?
|
| 445 |
+
if carbonTarget, ok := targets["carbon_reduction"]; ok {
|
| 446 |
+
agentCarbon := 0.0
|
| 447 |
+
baselineCarbon := 0.0
|
| 448 |
+
for _, b := range inp.Buildings {
|
| 449 |
+
agentCarbon += b.CumulativeCarbon
|
| 450 |
+
baselineCarbon += b.BaselineCarbon
|
| 451 |
+
}
|
| 452 |
+
if baselineCarbon > 0 {
|
| 453 |
+
actualReduction := 1.0 - agentCarbon/baselineCarbon
|
| 454 |
+
if actualReduction >= carbonTarget {
|
| 455 |
+
taskCompletionScore = 1.0
|
| 456 |
+
} else {
|
| 457 |
+
taskCompletionScore = math.Max(0, actualReduction/carbonTarget)
|
| 458 |
+
}
|
| 459 |
+
}
|
| 460 |
+
grade.Details["target_carbon_reduction"] = carbonTarget
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
// ── Weighted final score ─────────────────────────────────────────────────
|
| 464 |
+
// Use weights from the card; fall back to Task 4 defaults if missing
|
| 465 |
+
wTask := getWeight(weights, "task_completion", 0.50)
|
| 466 |
+
wCost := getWeight(weights, "cost", 0.20)
|
| 467 |
+
wTemp := getWeight(weights, "temperature", 0.20)
|
| 468 |
+
wGrid := getWeight(weights, "grid_response", 0.05)
|
| 469 |
+
wCarbon := getWeight(weights, "carbon", 0.05)
|
| 470 |
+
|
| 471 |
+
finalScore := taskCompletionScore*wTask +
|
| 472 |
+
costScore*wCost +
|
| 473 |
+
tempScore*wTemp +
|
| 474 |
+
gridScore*wGrid +
|
| 475 |
+
carbonScore*wCarbon
|
| 476 |
+
|
| 477 |
+
grade.SubScores["task_completion"] = clampOpenInterval(math.Round(taskCompletionScore*10000) / 10000)
|
| 478 |
+
grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
|
| 479 |
+
grade.SubScores["temperature"] = clampOpenInterval(math.Round(tempScore*10000) / 10000)
|
| 480 |
+
grade.SubScores["grid_response"] = clampOpenInterval(math.Round(gridScore*10000) / 10000)
|
| 481 |
+
grade.SubScores["carbon"] = clampOpenInterval(math.Round(carbonScore*10000) / 10000)
|
| 482 |
+
grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)
|
| 483 |
+
|
| 484 |
+
grade.Details["instruction_card_text"] = card.Text
|
| 485 |
+
return grade
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
// getWeight safely retrieves a weight from a map, returning defaultVal if missing.
|
| 489 |
+
func getWeight(weights map[string]float64, key string, defaultVal float64) float64 {
|
| 490 |
+
if v, ok := weights[key]; ok {
|
| 491 |
+
return v
|
| 492 |
+
}
|
| 493 |
+
return defaultVal
|
| 494 |
+
}
|
main.go
CHANGED
|
@@ -382,6 +382,7 @@ func (s *Server) handleGrade(w http.ResponseWriter, r *http.Request) {
|
|
| 382 |
TMin: env.TMinDefault,
|
| 383 |
TMax: env.TMaxDefault,
|
| 384 |
ExploitPenalties: penalties,
|
|
|
|
| 385 |
})
|
| 386 |
|
| 387 |
w.Header().Set("Content-Type", "application/json")
|
|
|
|
| 382 |
TMin: env.TMinDefault,
|
| 383 |
TMax: env.TMaxDefault,
|
| 384 |
ExploitPenalties: penalties,
|
| 385 |
+
InstructionCard: state.InstructionCard,
|
| 386 |
})
|
| 387 |
|
| 388 |
w.Header().Set("Content-Type", "application/json")
|