| |
| package env |
|
|
| import "math" |
|
|
| |
| |
| func clampOpenInterval(score float64) float64 { |
| const epsilon = 1e-6 |
| if score <= 0.0 { |
| return epsilon |
| } |
| if score >= 1.0 { |
| return 1.0 - epsilon |
| } |
| return score |
| } |
|
|
| |
| type TaskConfig struct { |
| ID int `json:"id"` |
| Name string `json:"name"` |
| Description string `json:"description"` |
| Difficulty string `json:"difficulty"` |
| Weights map[string]float64 `json:"weights"` |
| } |
|
|
| |
| func AllTasks() []TaskConfig { |
| return []TaskConfig{ |
| { |
| ID: 1, |
| Name: "Cost Minimization", |
| Description: "Minimize total energy cost over a 24-hour episode with no process constraints. Beat the always-on flat policy baseline.", |
| Difficulty: "easy", |
| Weights: map[string]float64{"cost": 1.0}, |
| }, |
| { |
| ID: 2, |
| Name: "Constrained Temperature Management", |
| Description: "Minimize cost while keeping indoor temperature within ±2°C of setpoint at all times.", |
| Difficulty: "medium", |
| Weights: map[string]float64{"cost": 0.6, "temperature": 0.4}, |
| }, |
| { |
| ID: 3, |
| Name: "Full Demand-Response with Batch Scheduling", |
| Description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs before their deadlines, and minimize carbon emissions.", |
| Difficulty: "hard", |
| Weights: map[string]float64{"cost": 0.28, "temperature": 0.20, "grid_response": 0.20, "batch_deadline": 0.12, "carbon": 0.20}, |
| }, |
| } |
| } |
|
|
| |
| type GradeEpisodeInput struct { |
| TaskID int |
| Buildings []*BuildingState |
| Replay []ReplayEntry |
| TempHistory [][]float64 |
| TMin float64 |
| TMax float64 |
| ExploitPenalties []float64 |
| } |
|
|
| |
| |
| func GradeEpisode(inp GradeEpisodeInput) EpisodeGrade { |
| grade := EpisodeGrade{ |
| TaskID: inp.TaskID, |
| SubScores: map[string]float64{}, |
| Details: map[string]interface{}{}, |
| } |
|
|
| switch inp.TaskID { |
| case 1: |
| grade = gradeTask1(inp, grade) |
| case 2: |
| grade = gradeTask2(inp, grade) |
| case 3: |
| grade = gradeTask3(inp, grade) |
| default: |
| grade = gradeTask1(inp, grade) |
| } |
|
|
| |
| totalPenalty := 0.0 |
| for i, b := range inp.Buildings { |
| _ = b |
| if i < len(inp.ExploitPenalties) { |
| totalPenalty += inp.ExploitPenalties[i] |
| } |
| } |
| if totalPenalty > 0 { |
| grade.ExploitDetected = true |
| grade.PenaltyApplied = math.Min(totalPenalty, 0.3) |
| grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied) |
| } |
|
|
| |
| grade.Score = clampOpenInterval(math.Round(grade.Score*10000) / 10000) |
|
|
| |
| for key, val := range grade.SubScores { |
| grade.SubScores[key] = clampOpenInterval(math.Round(val*10000) / 10000) |
| } |
| return grade |
| } |
|
|
| |
|
|
| func gradeTask1(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade { |
| agentCost := 0.0 |
| baselineCost := 0.0 |
| for _, b := range inp.Buildings { |
| agentCost += b.CumulativeCost |
| baselineCost += b.BaselineCost |
| } |
|
|
| var costScore float64 |
| if baselineCost > 0 { |
| |
| |
| ratio := agentCost / baselineCost |
| costScore = math.Max(0, 1.0-ratio) |
| } |
|
|
| |
| clamped := clampOpenInterval(math.Min(1.0, costScore)) |
| grade.SubScores["cost"] = clampOpenInterval(math.Round(clamped*10000) / 10000) |
| grade.Score = grade.SubScores["cost"] |
| grade.Details["agent_cost"] = agentCost |
| grade.Details["baseline_cost"] = baselineCost |
| grade.Details["cost_ratio"] = agentCost / math.Max(baselineCost, 0.01) |
| return grade |
| } |
|
|
| |
|
|
| func gradeTask2(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade { |
| |
| grade = gradeTask1(inp, grade) |
| costScore := grade.SubScores["cost"] |
|
|
| |
| totalSteps := 0 |
| withinBounds := 0 |
| for i, history := range inp.TempHistory { |
| _ = i |
| for _, temp := range history { |
| totalSteps++ |
| if temp >= inp.TMin && temp <= inp.TMax { |
| withinBounds++ |
| } |
| } |
| } |
| constraintScore := 0.0 |
| if totalSteps > 0 { |
| constraintScore = float64(withinBounds) / float64(totalSteps) |
| } |
|
|
| |
| grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000) |
| grade.SubScores["temperature"] = clampOpenInterval(math.Round(constraintScore*10000) / 10000) |
| finalScore := costScore*0.6 + constraintScore*0.4 |
| grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000) |
| grade.Details["within_bounds_steps"] = withinBounds |
| grade.Details["total_steps"] = totalSteps |
| return grade |
| } |
|
|
| |
|
|
| func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade { |
| |
| grade = gradeTask2(inp, grade) |
| costScore := grade.SubScores["cost"] |
| tempScore := grade.SubScores["temperature"] |
|
|
| |
| |
| gridStressSteps := 0 |
| gridResponseSteps := 0 |
| for _, entry := range inp.Replay { |
| if entry.Observation.GridStressSignal > 0.7 { |
| gridStressSteps++ |
| if entry.Action.LoadShedFraction > 0.15 { |
| gridResponseSteps++ |
| } |
| } |
| } |
| gridScore := 0.5 |
| if gridStressSteps > 0 { |
| gridScore = float64(gridResponseSteps) / float64(gridStressSteps) |
| } |
|
|
| |
| totalJobs := 0 |
| completedOnTime := 0 |
| for _, b := range inp.Buildings { |
| for _, job := range b.Jobs { |
| totalJobs++ |
| if job.Completed && !job.MissedDeadline { |
| completedOnTime++ |
| } |
| } |
| } |
| batchScore := 0.0 |
| if totalJobs > 0 { |
| batchScore = float64(completedOnTime) / float64(totalJobs) |
| } |
|
|
| |
| agentCarbon := 0.0 |
| baselineCarbon := 0.0 |
| for _, b := range inp.Buildings { |
| agentCarbon += b.CumulativeCarbon |
| baselineCarbon += b.BaselineCarbon |
| } |
| carbonScore := 0.0 |
| if baselineCarbon > 0 { |
| carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon) |
| } |
|
|
| |
| grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000) |
| grade.SubScores["temperature"] = clampOpenInterval(math.Round(tempScore*10000) / 10000) |
| grade.SubScores["grid_response"] = clampOpenInterval(math.Round(gridScore*10000) / 10000) |
| grade.SubScores["batch_deadline"] = clampOpenInterval(math.Round(batchScore*10000) / 10000) |
| grade.SubScores["carbon"] = clampOpenInterval(math.Round(math.Min(1.0, carbonScore)*10000) / 10000) |
|
|
| finalScore := costScore*0.28 + tempScore*0.20 + gridScore*0.20 + batchScore*0.12 + carbonScore*0.20 |
| grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000) |
|
|
| grade.Details["grid_stress_steps"] = gridStressSteps |
| grade.Details["grid_response_steps"] = gridResponseSteps |
| grade.Details["total_jobs"] = totalJobs |
| grade.Details["completed_on_time"] = completedOnTime |
| grade.Details["agent_carbon"] = agentCarbon |
| grade.Details["baseline_carbon"] = baselineCarbon |
| return grade |
| } |
|
|