gridmind / python /models.py
ShreeshantXD's picture
feat: add baseline scores JSON, inference script, and update Dockerfile for improved project structure
6d74982
"""
GridMind-RL OpenEnv Pydantic models.
These types mirror the Go structs exactly for full schema compliance.
"""
from __future__ import annotations
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field, field_validator
class BatchJob(BaseModel):
id: int
deadline_slot: int
duration: int
power_draw: float
scheduled: bool
scheduled_at: int
completed: bool
missed_deadline: bool
class ObservationModel(BaseModel):
"""Full observation returned on each step / GET /state."""
indoor_temperature: float = Field(..., description="Current building indoor temperature (°C)")
thermal_storage_level: float = Field(..., ge=0.0, le=1.0, description="Thermal storage fill level (0–1)")
process_demand: float = Field(..., ge=0.0, description="Current process power demand (kW)")
current_price: float = Field(..., gt=0.0, description="Real-time electricity price ($/kWh)")
grid_stress_signal: float = Field(..., ge=0.0, le=1.0, description="Utility demand-response urgency (0–1)")
carbon_intensity: float = Field(..., ge=0.0, description="Grid carbon intensity (gCO2/kWh)")
hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
step: int = Field(..., ge=0, le=95, description="Current timestep (0–95); 96 steps = 24h")
building_id: int = Field(default=0, description="Building index in federation")
class ActionModel(BaseModel):
"""Agent action for a single timestep."""
hvac_power_level: float = Field(..., ge=0.0, le=1.0, description="HVAC fraction of max power (0–1)")
thermal_charge_rate: float = Field(..., ge=-1.0, le=1.0, description="Storage charge (+) or discharge (-) rate")
batch_job_slot: int = Field(..., ge=0, le=4, description="Time slot offset for next batch job (0=now, 1–4=defer)")
load_shed_fraction: float = Field(..., ge=0.0, le=0.5, description="Fraction of non-critical load to shed (0–0.5)")
building_id: int = Field(default=0, description="Building index this action targets")
@field_validator("hvac_power_level")
@classmethod
def clamp_hvac(cls, v: float) -> float:
return max(0.0, min(1.0, v))
@field_validator("thermal_charge_rate")
@classmethod
def clamp_charge(cls, v: float) -> float:
return max(-1.0, min(1.0, v))
@field_validator("load_shed_fraction")
@classmethod
def clamp_shed(cls, v: float) -> float:
return max(0.0, min(0.5, v))
class RewardComponents(BaseModel):
"""Individual reward signal components."""
cost_savings: float = Field(..., description="Negative reward for energy cost")
temp_constraint: float = Field(..., description="Positive if temperature within bounds")
grid_response: float = Field(..., description="Bonus for shedding during high grid stress")
deadline_penalty: float = Field(..., description="Negative for missed batch deadlines")
efficiency_bonus: float = Field(..., description="Storage arbitrage bonus")
stability_penalty: float = Field(..., description="Penalty for rapid HVAC oscillation")
carbon_reward: float = Field(..., description="Low-carbon operation bonus")
total: float = Field(..., description="Weighted sum of all components")
class StepInfo(BaseModel):
"""Auxiliary information returned at each step."""
reward_components: RewardComponents
energy_used_kwh: float
carbon_emitted_gco2: float
price_signal: float
grid_stress: float
batch_completed: List[int] = Field(default_factory=list)
batch_missed: List[int] = Field(default_factory=list)
episode: int
step: int
class StepResponse(BaseModel):
"""Full response from POST /step."""
observation: ObservationModel
reward: float
done: bool
info: StepInfo
class ResetRequest(BaseModel):
"""Request body for POST /reset."""
seed: Optional[int] = Field(None, description="Random seed for reproducibility")
task_id: int = Field(1, ge=1, le=3, description="Task to run (1=easy, 2=medium, 3=hard)")
difficulty: Optional[str] = Field(None, description="Override difficulty: easy/medium/hard")
num_buildings: int = Field(1, ge=1, le=3, description="Number of buildings in federation")
class ResetResponse(BaseModel):
"""Response from POST /reset."""
observations: List[ObservationModel]
episode: int
task_id: int
seed: int
class BuildingStatePublic(BaseModel):
"""Full building state including history for dashboard rendering."""
# ObservationModel fields (flattened)
indoor_temperature: float
thermal_storage_level: float
process_demand: float
current_price: float
grid_stress_signal: float
carbon_intensity: float
hour_of_day: int
batch_queue: List[int] = Field(default_factory=list)
cumulative_cost: float
step: int
building_id: int
# Extended state
outdoor_temperature: float
setpoint_temperature: float
baseline_cost: float
cumulative_carbon: float
jobs: List[BatchJob] = Field(default_factory=list)
# History arrays
temp_history: List[float] = Field(default_factory=list)
cost_history: List[float] = Field(default_factory=list)
hvac_history: List[float] = Field(default_factory=list)
load_shed_history: List[float] = Field(default_factory=list)
reward_history: List[RewardComponents] = Field(default_factory=list)
class StateResponse(BaseModel):
"""Full environment state from GET /state."""
buildings: List[BuildingStatePublic]
price_curve_episode: List[float]
carbon_curve_episode: List[float]
episode: int
step: int
task_id: int
done: bool
seed: int
class TaskConfig(BaseModel):
"""Task configuration."""
id: int
name: str
description: str
difficulty: str
weights: Dict[str, float]
class EpisodeGrade(BaseModel):
"""Graded episode result."""
task_id: int
score: float = Field(..., ge=0.0, le=1.0)
sub_scores: Dict[str, float]
exploit_detected: bool
penalty_applied: float
details: Dict[str, Any]
# ── Action space schema (for LLM prompting) ────────────────────────────────
ACTION_SCHEMA = {
"type": "object",
"properties": {
"hvac_power_level": {
"type": "number",
"minimum": 0.0,
"maximum": 1.0,
"description": "Fraction of max HVAC power (0=off, 1=full power)"
},
"thermal_charge_rate": {
"type": "number",
"minimum": -1.0,
"maximum": 1.0,
"description": "Charge (+) or discharge (-) thermal storage at this fraction of max rate"
},
"batch_job_slot": {
"type": "integer",
"minimum": 0,
"maximum": 4,
"description": "Schedule next batch job: 0=run now, 1-4=defer by N 15-min intervals"
},
"load_shed_fraction": {
"type": "number",
"minimum": 0.0,
"maximum": 0.5,
"description": "Fraction of non-critical load to shed during this step (0=no shedding)"
},
"building_id": {
"type": "integer",
"minimum": 0,
"description": "Which building to apply this action to (0 for single-building mode)"
}
},
"required": ["hvac_power_level", "thermal_charge_rate", "batch_job_slot", "load_shed_fraction"]
}
# ── Observation space schema ───────────────────────────────────────────────
OBSERVATION_SCHEMA = {
"type": "object",
"properties": {
"indoor_temperature": {"type": "number", "description": "Indoor temperature °C"},
"thermal_storage_level": {"type": "number", "minimum": 0, "maximum": 1},
"process_demand": {"type": "number", "description": "Process power demand kW"},
"current_price": {"type": "number", "description": "Electricity price $/kWh"},
"grid_stress_signal": {"type": "number", "minimum": 0, "maximum": 1},
"carbon_intensity": {"type": "number", "description": "Grid carbon intensity gCO2/kWh"},
"hour_of_day": {"type": "integer", "minimum": 0, "maximum": 23},
"batch_queue": {"type": "array", "items": {"type": "integer"}},
"cumulative_cost": {"type": "number"},
"step": {"type": "integer"},
"building_id": {"type": "integer"}
}
}