Spaces:

Prajwal782007
/

Gridmind

Sleeping

App Files Files Community

Gridmind / env /tasks.go

adityss

feat: define GridMind-RL environment data models and task structures

c009bc5 24 days ago

raw

history blame contribute delete

17.6 kB

	// Package env defines the four GridMind-RL tasks and their deterministic graders.
	package env

	import (
	"fmt"
	"math"
	"math/rand"
	)

	// clampOpenInterval clamps a score to the open interval (0, 1), strictly excluding 0.0 and 1.0.
	// This ensures all scores satisfy the requirement: 0 < score < 1
	func clampOpenInterval(score float64) float64 {
	const epsilon = 1e-6
	if score <= 0.0 {
	return epsilon
	}
	if score >= 1.0 {
	return 1.0 - epsilon
	}
	return score
	}

	// TaskConfig describes a single task.
	type TaskConfig struct {
	ID int `json:"id"`
	Name string `json:"name"`
	Description string `json:"description"`
	Difficulty string `json:"difficulty"`
	Weights map[string]float64 `json:"weights"`
	}

	// AllTasks returns the ordered list of task configurations.
	func AllTasks() []TaskConfig {
	return []TaskConfig{
	{
	ID: 1,
	Name: "Cost Minimization",
	Description: "Minimize total energy cost over a 24-hour episode with no process constraints. Beat the always-on flat policy baseline.",
	Difficulty: "easy",
	Weights: map[string]float64{"cost": 1.0},
	},
	{
	ID: 2,
	Name: "Constrained Temperature Management",
	Description: "Minimize cost while keeping indoor temperature within ±2°C of setpoint at all times.",
	Difficulty: "medium",
	Weights: map[string]float64{"cost": 0.6, "temperature": 0.4},
	},
	{
	ID: 3,
	Name: "Full Demand-Response with Batch Scheduling",
	Description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs before their deadlines, and minimize carbon emissions.",
	Difficulty: "hard",
	Weights: map[string]float64{"cost": 0.28, "temperature": 0.20, "grid_response": 0.20, "batch_deadline": 0.12, "carbon": 0.20},
	},
	{
	ID: 4,
	Name: "Instruction-Following Operator",
	Description: "Complete a randomly sampled natural-language objective card. The agent must parse the instruction, plan accordingly, and satisfy all stated KPI targets.",
	Difficulty: "hard",
	Weights: map[string]float64{"task_completion": 0.50, "cost": 0.30, "temperature": 0.20},
	},
	}
	}

	// instructionTemplate is a parameterised instruction card template.
	type instructionTemplate struct {
	makeText func(params map[string]float64) string
	targets map[string]float64
	weights map[string]float64
	}

	// GenerateInstructionCard samples a random instruction card for Task 4.
	// The card contains a human-readable text objective plus machine-readable targets.
	func GenerateInstructionCard(rng rand.Rand) InstructionCard {
	// Pool of parameterised templates
	templates := []instructionTemplate{
	{
	// Template 1: hard energy cap
	makeText: func(p map[string]float64) string {
	return fmt.Sprintf("Keep total energy cost under $%.2f for this 24-hour episode while maintaining comfort.", p["cost_cap"])
	},
	targets: map[string]float64{"max_cost": 0.0}, // filled in below
	weights: map[string]float64{"task_completion": 0.5, "cost": 0.3, "temperature": 0.2},
	},
	{
	// Template 2: aggressive temperature constraint
	makeText: func(p map[string]float64) string {
	return fmt.Sprintf("Never allow indoor temperature to exceed %.0f°C or drop below %.0f°C at any point during the episode.", p["t_max"], p["t_min"])
	},
	targets: map[string]float64{"t_min": 0.0, "t_max": 0.0},
	weights: map[string]float64{"task_completion": 0.5, "temperature": 0.4, "cost": 0.1},
	},
	{
	// Template 3: grid response SLA
	makeText: func(p map[string]float64) string {
	return fmt.Sprintf("Respond to all grid stress events (signal > 0.7) by shedding at least %.0f%% of non-critical load.", p["min_shed_pct"]*100)
	},
	targets: map[string]float64{"min_shed_fraction": 0.0},
	weights: map[string]float64{"task_completion": 0.5, "cost": 0.2, "temperature": 0.3},
	},
	{
	// Template 4: carbon reduction
	makeText: func(p map[string]float64) string {
	return fmt.Sprintf("Reduce carbon emissions to at least %.0f%% below the always-on baseline policy.", p["carbon_reduction_pct"]*100)
	},
	targets: map[string]float64{"carbon_reduction": 0.0},
	weights: map[string]float64{"task_completion": 0.5, "cost": 0.2, "temperature": 0.2, "carbon": 0.1},
	},
	{
	// Template 5: combined cost + temperature + grid
	makeText: func(p map[string]float64) string {
	return fmt.Sprintf("Keep energy cost under $%.2f, temperature between %.0f–%.0f°C, and respond to all grid stress events.", p["cost_cap"], p["t_min"], p["t_max"])
	},
	targets: map[string]float64{"max_cost": 0.0, "t_min": 0.0, "t_max": 0.0, "min_shed_fraction": 0.25},
	weights: map[string]float64{"task_completion": 0.5, "cost": 0.2, "temperature": 0.2, "grid_response": 0.1},
	},
	}

	// Pick a random template
	tmpl := templates[rng.Intn(len(templates))]

	// Randomise numeric parameters
	params := map[string]float64{
	"cost_cap": 1.5 + rng.Float64()*2.0, // $1.50 – $3.50
	"t_min": 18.0 + rng.Float64()*2.0, // 18–20 °C
	"t_max": 23.0 + rng.Float64()*2.0, // 23–25 °C
	"min_shed_pct": 0.2 + rng.Float64()*0.2, // 20–40 %
	"carbon_reduction_pct": 0.15 + rng.Float64()*0.2, // 15–35 %
	}

	// Fill targets from params
	targets := make(map[string]float64)
	for k := range tmpl.targets {
	switch k {
	case "max_cost":
	targets[k] = params["cost_cap"]
	case "t_min":
	targets[k] = params["t_min"]
	case "t_max":
	targets[k] = params["t_max"]
	case "min_shed_fraction":
	targets[k] = params["min_shed_pct"]
	case "carbon_reduction":
	targets[k] = params["carbon_reduction_pct"]
	}
	}

	weights := make(map[string]float64)
	for k, v := range tmpl.weights {
	weights[k] = v
	}

	return &InstructionCard{
	Text: tmpl.makeText(params),
	Targets: targets,
	Weights: weights,
	}
	}

	// GradeEpisodeInput collects all data needed to score a completed episode.
	type GradeEpisodeInput struct {
	TaskID int
	Buildings []*BuildingState
	Replay []ReplayEntry
	TempHistory [][]float64 // per building, per step
	TMin float64
	TMax float64
	ExploitPenalties []float64
	InstructionCard *InstructionCard // set for Task 4 episodes
	}

	// GradeEpisode computes a deterministic 0.0–1.0 score for a completed episode.
	// Given a fixed random seed, this function is fully deterministic.
	func GradeEpisode(inp GradeEpisodeInput) EpisodeGrade {
	grade := EpisodeGrade{
	TaskID: inp.TaskID,
	SubScores: map[string]float64{},
	Details: map[string]interface{}{},
	}

	switch inp.TaskID {
	case 1:
	grade = gradeTask1(inp, grade)
	case 2:
	grade = gradeTask2(inp, grade)
	case 3:
	grade = gradeTask3(inp, grade)
	case 4:
	grade = gradeTask4(inp, grade)
	default:
	grade = gradeTask1(inp, grade)
	}

	// Exploit detection: reduce score by penalty
	totalPenalty := 0.0
	for i, b := range inp.Buildings {
	_ = b
	if i < len(inp.ExploitPenalties) {
	totalPenalty += inp.ExploitPenalties[i]
	}
	}
	if totalPenalty > 0 {
	grade.ExploitDetected = true
	grade.PenaltyApplied = math.Min(totalPenalty, 0.3) // max 30% penalty
	grade.Score = math.Max(0, grade.Score-grade.PenaltyApplied)
	}

	// Clamp AFTER rounding to ensure boundary values are handled
	grade.Score = clampOpenInterval(math.Round(grade.Score*10000) / 10000) // 4 decimal places

	// Also ensure all sub-scores are properly clamped after rounding
	for key, val := range grade.SubScores {
	grade.SubScores[key] = clampOpenInterval(math.Round(val*10000) / 10000)
	}
	return grade
	}

	// ── Task 1: Cost Minimization ───────────────────────────────────────────────

	func gradeTask1(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
	agentCost := 0.0
	baselineCost := 0.0
	for _, b := range inp.Buildings {
	agentCost += b.CumulativeCost
	baselineCost += b.BaselineCost
	}

	var costScore float64
	if baselineCost > 0 {
	// score = max(0, 1 - agent_cost / baseline_cost)
	// 0.0 if agent costs same or more, 1.0 if agent costs nothing
	ratio := agentCost / baselineCost
	costScore = math.Max(0, 1.0-ratio)
	}

	// Clamp after min operation
	clamped := clampOpenInterval(math.Min(1.0, costScore))
	grade.SubScores["cost"] = clampOpenInterval(math.Round(clamped*10000) / 10000)
	grade.Score = grade.SubScores["cost"]
	grade.Details["agent_cost"] = agentCost
	grade.Details["baseline_cost"] = baselineCost
	grade.Details["cost_ratio"] = agentCost / math.Max(baselineCost, 0.01)
	return grade
	}

	// ── Task 2: Constrained Temperature Management ──────────────────────────────

	func gradeTask2(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
	// Cost sub-score (same as task 1)
	grade = gradeTask1(inp, grade)
	costScore := grade.SubScores["cost"]

	// Temperature constraint sub-score
	totalSteps := 0
	withinBounds := 0
	for i, history := range inp.TempHistory {
	_ = i
	for _, temp := range history {
	totalSteps++
	if temp >= inp.TMin && temp <= inp.TMax {
	withinBounds++
	}
	}
	}
	constraintScore := 0.0
	if totalSteps > 0 {
	constraintScore = float64(withinBounds) / float64(totalSteps)
	}

	// Clamp sub-scores and final score after rounding
	grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
	grade.SubScores["temperature"] = clampOpenInterval(math.Round(constraintScore*10000) / 10000)
	finalScore := costScore0.6 + constraintScore0.4
	grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)
	grade.Details["within_bounds_steps"] = withinBounds
	grade.Details["total_steps"] = totalSteps
	return grade
	}

	// ── Task 3: Full Demand-Response with Batch Scheduling ──────────────────────

	func gradeTask3(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
	// Reuse task 2 for cost + temperature scores
	grade = gradeTask2(inp, grade)
	costScore := grade.SubScores["cost"]
	tempScore := grade.SubScores["temperature"]

	// Grid response sub-score
	// Count steps where stress > 0.7 and shed_fraction > 0.15
	gridStressSteps := 0
	gridResponseSteps := 0
	for _, entry := range inp.Replay {
	if entry.Observation.GridStressSignal > 0.7 {
	gridStressSteps++
	if entry.Action.LoadShedFraction > 0.15 {
	gridResponseSteps++
	}
	}
	}
	gridScore := 0.5 // default neutral if no stress events
	if gridStressSteps > 0 {
	gridScore = float64(gridResponseSteps) / float64(gridStressSteps)
	}

	// Batch deadline sub-score
	totalJobs := 0
	completedOnTime := 0
	for _, b := range inp.Buildings {
	for _, job := range b.Jobs {
	totalJobs++
	if job.Completed && !job.MissedDeadline {
	completedOnTime++
	}
	}
	}
	batchScore := 0.0
	if totalJobs > 0 {
	batchScore = float64(completedOnTime) / float64(totalJobs)
	}

	// Carbon sub-score vs baseline always-on policy (same spirit as cost)
	agentCarbon := 0.0
	baselineCarbon := 0.0
	for _, b := range inp.Buildings {
	agentCarbon += b.CumulativeCarbon
	baselineCarbon += b.BaselineCarbon
	}
	carbonScore := 0.0
	if baselineCarbon > 0 {
	carbonScore = math.Max(0, 1.0-agentCarbon/baselineCarbon)
	}

	// Clamp all sub-scores after rounding
	grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
	grade.SubScores["temperature"] = clampOpenInterval(math.Round(tempScore*10000) / 10000)
	grade.SubScores["grid_response"] = clampOpenInterval(math.Round(gridScore*10000) / 10000)
	grade.SubScores["batch_deadline"] = clampOpenInterval(math.Round(batchScore*10000) / 10000)
	grade.SubScores["carbon"] = clampOpenInterval(math.Round(math.Min(1.0, carbonScore)*10000) / 10000)

	finalScore := costScore0.28 + tempScore0.20 + gridScore0.20 + batchScore0.12 + carbonScore*0.20
	grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)

	grade.Details["grid_stress_steps"] = gridStressSteps
	grade.Details["grid_response_steps"] = gridResponseSteps
	grade.Details["total_jobs"] = totalJobs
	grade.Details["completed_on_time"] = completedOnTime
	grade.Details["agent_carbon"] = agentCarbon
	grade.Details["baseline_carbon"] = baselineCarbon
	return grade
	}

	// ── Task 4: Instruction-Following Operator ───────────────────────────────────

	// gradeTask4 evaluates how well the agent satisfied the natural-language
	// instruction card issued at reset. It reads the InstructionCard from Building 0,
	// checks each target that appears in the card, and computes a weighted score.
	// Falls back to Task 3 grading when no instruction card is available.
	func gradeTask4(inp GradeEpisodeInput, grade EpisodeGrade) EpisodeGrade {
	// Require an instruction card — passed from the environment at grade time
	if inp.InstructionCard == nil {
	// Fallback: grade as Task 3 (no card to evaluate)
	return gradeTask3(inp, grade)
	}

	card := inp.InstructionCard
	weights := card.Weights
	targets := card.Targets

	// Always compute base sub-scores — reuse existing graders
	base := gradeTask3(inp, EpisodeGrade{
	TaskID: inp.TaskID,
	SubScores: map[string]float64{},
	Details: map[string]interface{}{},
	})
	costScore := base.SubScores["cost"]
	tempScore := base.SubScores["temperature"]
	gridScore := base.SubScores["grid_response"]
	carbonScore := base.SubScores["carbon"]

	// ── Card-specific KPI checks ─────────────────────────────────────────────

	// KPI 1: Cost cap — did the agent stay under max_cost?
	taskCompletionScore := 0.5 // default partial credit
	if maxCost, ok := targets["max_cost"]; ok && maxCost > 0 {
	agentCost := 0.0
	for _, b := range inp.Buildings {
	agentCost += b.CumulativeCost
	}
	if agentCost <= maxCost {
	taskCompletionScore = 1.0
	} else {
	// Partial credit: how close were they?
	taskCompletionScore = math.Max(0, 1.0-(agentCost-maxCost)/maxCost)
	}
	grade.Details["target_max_cost"] = maxCost
	grade.Details["actual_cost"] = agentCost
	}

	// KPI 2: Temperature bounds — never violated t_min / t_max
	if tMin, hasTMin := targets["t_min"]; hasTMin {
	tMax, hasTMax := targets["t_max"]
	if hasTMax {
	totalSteps := 0
	withinBounds := 0
	for _, history := range inp.TempHistory {
	for _, temp := range history {
	totalSteps++
	if temp >= tMin && temp <= tMax {
	withinBounds++
	}
	}
	}
	if totalSteps > 0 {
	adherence := float64(withinBounds) / float64(totalSteps)
	// Strict: full credit only if ALWAYS within bounds
	taskCompletionScore = adherence
	}
	grade.Details["target_t_min"] = tMin
	grade.Details["target_t_max"] = tMax
	}
	}

	// KPI 3: Grid response SLA — shed >= min_shed_fraction when stress > 0.7
	if minShed, ok := targets["min_shed_fraction"]; ok {
	stressSteps := 0
	compliantSteps := 0
	for _, entry := range inp.Replay {
	if entry.Observation.GridStressSignal > 0.7 {
	stressSteps++
	if entry.Action.LoadShedFraction >= minShed {
	compliantSteps++
	}
	}
	}
	if stressSteps > 0 {
	taskCompletionScore = float64(compliantSteps) / float64(stressSteps)
	}
	grade.Details["target_min_shed"] = minShed
	grade.Details["stress_steps"] = stressSteps
	grade.Details["compliant_steps"] = compliantSteps
	}

	// KPI 4: Carbon reduction — did agent beat baseline by carbon_reduction target?
	if carbonTarget, ok := targets["carbon_reduction"]; ok {
	agentCarbon := 0.0
	baselineCarbon := 0.0
	for _, b := range inp.Buildings {
	agentCarbon += b.CumulativeCarbon
	baselineCarbon += b.BaselineCarbon
	}
	if baselineCarbon > 0 {
	actualReduction := 1.0 - agentCarbon/baselineCarbon
	if actualReduction >= carbonTarget {
	taskCompletionScore = 1.0
	} else {
	taskCompletionScore = math.Max(0, actualReduction/carbonTarget)
	}
	}
	grade.Details["target_carbon_reduction"] = carbonTarget
	}

	// ── Weighted final score ─────────────────────────────────────────────────
	// Use weights from the card; fall back to Task 4 defaults if missing
	wTask := getWeight(weights, "task_completion", 0.50)
	wCost := getWeight(weights, "cost", 0.20)
	wTemp := getWeight(weights, "temperature", 0.20)
	wGrid := getWeight(weights, "grid_response", 0.05)
	wCarbon := getWeight(weights, "carbon", 0.05)

	finalScore := taskCompletionScore*wTask +
	costScore*wCost +
	tempScore*wTemp +
	gridScore*wGrid +
	carbonScore*wCarbon

	grade.SubScores["task_completion"] = clampOpenInterval(math.Round(taskCompletionScore*10000) / 10000)
	grade.SubScores["cost"] = clampOpenInterval(math.Round(costScore*10000) / 10000)
	grade.SubScores["temperature"] = clampOpenInterval(math.Round(tempScore*10000) / 10000)
	grade.SubScores["grid_response"] = clampOpenInterval(math.Round(gridScore*10000) / 10000)
	grade.SubScores["carbon"] = clampOpenInterval(math.Round(carbonScore*10000) / 10000)
	grade.Score = clampOpenInterval(math.Round(finalScore*10000) / 10000)

	grade.Details["instruction_card_text"] = card.Text
	return grade
	}

	// getWeight safely retrieves a weight from a map, returning defaultVal if missing.
	func getWeight(weights map[string]float64, key string, defaultVal float64) float64 {
	if v, ok := weights[key]; ok {
	return v
	}
	return defaultVal
	}