File size: 8,736 Bytes
1875b13 6d74982 1875b13 4c1963b 1875b13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | """
GridMind-RL OpenEnv Pydantic models.
These types mirror the Go structs exactly for full schema compliance.
"""
from __future__ import annotations
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field, field_validator
class BatchJob(BaseModel):
id: int
deadline_slot: int
duration: int
power_draw: float
scheduled: bool
scheduled_at: int
completed: bool
missed_deadline: bool
class ObservationModel(BaseModel):
"""Full observation returned on each step / GET /state."""
indoor_temperature: float = Field(..., description="Current building indoor temperature (°C)")
thermal_storage_level: float = Field(..., ge=0.0, le=1.0, description="Thermal storage fill level (0–1)")
process_demand: float = Field(..., ge=0.0, description="Current process power demand (kW)")
current_price: float = Field(..., gt=0.0, description="Real-time electricity price ($/kWh)")
grid_stress_signal: float = Field(..., ge=0.0, le=1.0, description="Utility demand-response urgency (0–1)")
carbon_intensity: float = Field(..., ge=0.0, description="Grid carbon intensity (gCO2/kWh)")
hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
step: int = Field(..., ge=0, le=95, description="Current timestep (0–95); 96 steps = 24h")
building_id: int = Field(default=0, description="Building index in federation")
class ActionModel(BaseModel):
"""Agent action for a single timestep."""
hvac_power_level: float = Field(..., ge=0.0, le=1.0, description="HVAC fraction of max power (0–1)")
thermal_charge_rate: float = Field(..., ge=-1.0, le=1.0, description="Storage charge (+) or discharge (-) rate")
batch_job_slot: int = Field(..., ge=0, le=4, description="Time slot offset for next batch job (0=now, 1–4=defer)")
load_shed_fraction: float = Field(..., ge=0.0, le=0.5, description="Fraction of non-critical load to shed (0–0.5)")
building_id: int = Field(default=0, description="Building index this action targets")
@field_validator("hvac_power_level")
@classmethod
def clamp_hvac(cls, v: float) -> float:
return max(0.0, min(1.0, v))
@field_validator("thermal_charge_rate")
@classmethod
def clamp_charge(cls, v: float) -> float:
return max(-1.0, min(1.0, v))
@field_validator("load_shed_fraction")
@classmethod
def clamp_shed(cls, v: float) -> float:
return max(0.0, min(0.5, v))
class RewardComponents(BaseModel):
"""Individual reward signal components."""
cost_savings: float = Field(..., description="Negative reward for energy cost")
temp_constraint: float = Field(..., description="Positive if temperature within bounds")
grid_response: float = Field(..., description="Bonus for shedding during high grid stress")
deadline_penalty: float = Field(..., description="Negative for missed batch deadlines")
efficiency_bonus: float = Field(..., description="Storage arbitrage bonus")
stability_penalty: float = Field(..., description="Penalty for rapid HVAC oscillation")
carbon_reward: float = Field(..., description="Low-carbon operation bonus")
total: float = Field(..., description="Weighted sum of all components")
class StepInfo(BaseModel):
"""Auxiliary information returned at each step."""
reward_components: RewardComponents
energy_used_kwh: float
carbon_emitted_gco2: float
price_signal: float
grid_stress: float
batch_completed: List[int] = Field(default_factory=list)
batch_missed: List[int] = Field(default_factory=list)
episode: int
step: int
class StepResponse(BaseModel):
"""Full response from POST /step."""
observation: ObservationModel
reward: float
done: bool
info: StepInfo
class ResetRequest(BaseModel):
"""Request body for POST /reset."""
seed: Optional[int] = Field(None, description="Random seed for reproducibility")
task_id: int = Field(1, ge=1, le=3, description="Task to run (1=easy, 2=medium, 3=hard)")
difficulty: Optional[str] = Field(None, description="Override difficulty: easy/medium/hard")
num_buildings: int = Field(1, ge=1, le=3, description="Number of buildings in federation")
class ResetResponse(BaseModel):
"""Response from POST /reset."""
observations: List[ObservationModel]
episode: int
task_id: int
seed: int
class BuildingStatePublic(BaseModel):
"""Full building state including history for dashboard rendering."""
# ObservationModel fields (flattened)
indoor_temperature: float
thermal_storage_level: float
process_demand: float
current_price: float
grid_stress_signal: float
carbon_intensity: float
hour_of_day: int
batch_queue: List[int] = Field(default_factory=list)
cumulative_cost: float
step: int
building_id: int
# Extended state
outdoor_temperature: float
setpoint_temperature: float
baseline_cost: float
cumulative_carbon: float
jobs: List[BatchJob] = Field(default_factory=list)
# History arrays
temp_history: List[float] = Field(default_factory=list)
cost_history: List[float] = Field(default_factory=list)
hvac_history: List[float] = Field(default_factory=list)
load_shed_history: List[float] = Field(default_factory=list)
reward_history: List[RewardComponents] = Field(default_factory=list)
class StateResponse(BaseModel):
"""Full environment state from GET /state."""
buildings: List[BuildingStatePublic]
price_curve_episode: List[float]
carbon_curve_episode: List[float]
episode: int
step: int
task_id: int
done: bool
seed: int
class TaskConfig(BaseModel):
"""Task configuration."""
id: int
name: str
description: str
difficulty: str
weights: Dict[str, float]
class EpisodeGrade(BaseModel):
"""Graded episode result."""
task_id: int
score: float = Field(..., ge=0.0, le=1.0)
sub_scores: Dict[str, float]
exploit_detected: bool
penalty_applied: float
details: Dict[str, Any]
# ── Action space schema (for LLM prompting) ────────────────────────────────
ACTION_SCHEMA = {
"type": "object",
"properties": {
"hvac_power_level": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Fraction of max HVAC power (0=off, 1=full power)"
},
"thermal_charge_rate": {
"type": "number",
"minimum": -1.0,
"maximum": 1.0,
"description": "Charge (+) or discharge (-) thermal storage at this fraction of max rate"
},
"batch_job_slot": {
"type": "integer",
"minimum": 0,
"maximum": 4,
"description": "Schedule next batch job: 0=run now, 1-4=defer by N 15-min intervals"
},
"load_shed_fraction": {
"type": "number",
"minimum": 0.0,
"maximum": 0.5,
"description": "Fraction of non-critical load to shed during this step (0=no shedding)"
},
"building_id": {
"type": "integer",
"minimum": 0,
"description": "Which building to apply this action to (0 for single-building mode)"
}
},
"required": ["hvac_power_level", "thermal_charge_rate", "batch_job_slot", "load_shed_fraction"]
}
# ── Observation space schema ───────────────────────────────────────────────
OBSERVATION_SCHEMA = {
"type": "object",
"properties": {
"indoor_temperature": {"type": "number", "description": "Indoor temperature °C"},
"thermal_storage_level": {"type": "number", "minimum": 0, "maximum": 1},
"process_demand": {"type": "number", "description": "Process power demand kW"},
"current_price": {"type": "number", "description": "Electricity price $/kWh"},
"grid_stress_signal": {"type": "number", "minimum": 0, "maximum": 1},
"carbon_intensity": {"type": "number", "description": "Grid carbon intensity gCO2/kWh"},
"hour_of_day": {"type": "integer", "minimum": 0, "maximum": 23},
"batch_queue": {"type": "array", "items": {"type": "integer"}},
"cumulative_cost": {"type": "number"},
"step": {"type": "integer"},
"building_id": {"type": "integer"}
}
}
|