File size: 14,393 Bytes
09ecf23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
"""
OpenEnv typed models: Observation, Action, Reward β€” V2
"""

from openenv.core.env_server.types import Action, Observation
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field


# ── Zone Observation ──────────────────────────────────────────────────────────

class ZoneObservation(BaseModel):
    zone_id: str

    # ── Thermal readings ──────────────────────────────────────────────────────
    cold_aisle_temp_c: float = Field(
        ...,
        description=(
            "Primary cold-aisle temperature sensor reading (Β°C). "
            "For zones with sensor_confidence < 0.7 this value may be drifted "
            "β€” it reflects the same faulty sensor as reported_temp_c. "
            "Use hot_aisle_temp_c, supply_air_temp_c, and it_load_kw to "
            "cross-check the true thermal state when sensor_confidence is low."
        ),
    )
    hot_aisle_temp_c: float = Field(
        ..., description="Return-air temperature from server exhausts (Β°C). Always accurate."
    )
    reported_temp_c: float = Field(
        ...,
        description=(
            "Secondary sensor reading (Β°C). Typically matches cold_aisle_temp_c; "
            "cross-checking both values plus hot_aisle physics can reveal sensor drift."
        ),
    )
    supply_air_temp_c: float = Field(
        ..., description="Actual delivered supply air temperature (Β°C)"
    )
    supply_air_temp_setpoint_c: float = Field(
        ..., description="Agent-controlled supply air temperature setpoint [16–26] (Β°C)"
    )

    # ── Load ──────────────────────────────────────────────────────────────────
    it_load_kw: float = Field(..., description="Current IT equipment power draw (kW)")
    it_load_pct: float = Field(
        ..., description="Normalised IT load relative to zone baseline [0-1]"
    )

    # ── Fan / cooling ─────────────────────────────────────────────────────────
    fan_speed_pct: float = Field(..., description="Current fan speed (0-100 %)")
    cooling_capacity_kw: float = Field(
        ..., description="Max cooling capacity at full fan speed (kW)"
    )

    # ── Environment ───────────────────────────────────────────────────────────
    humidity_pct: float = Field(..., description="Relative humidity (%)")

    # ── Sensor metadata ───────────────────────────────────────────────────────
    sensor_confidence: float = Field(
        ..., ge=0.0, le=1.0,
        description="Reliability weight of this zone's sensor reading [0.0-1.0]"
    )
    zone_priority: int = Field(
        ..., ge=0, le=2,
        description="Static criticality label: 0=low, 1=medium, 2=critical"
    )

    # ── Forecast ──────────────────────────────────────────────────────────────
    load_forecast_next_hour: float = Field(
        default=0.0,
        description="Predicted IT load in the next 60 minutes (kW)"
    )


# ── Facility Observation ──────────────────────────────────────────────────────

class DCObservation(Observation):
    step: int

    # ── Time ──────────────────────────────────────────────────────────────────
    timestamp_hour: float = Field(..., description="Hour of day [0–24]")
    timestamp_day_sin: float = Field(
        ..., description="sin(2Ο€ Γ— hour/24) β€” cyclical time encoding"
    )
    timestamp_day_cos: float = Field(
        ..., description="cos(2Ο€ Γ— hour/24) β€” cyclical time encoding"
    )

    # ── Weather ───────────────────────────────────────────────────────────────
    outside_temp_c: float = Field(..., description="Outdoor dry-bulb temperature (Β°C)")
    wet_bulb_temp_c: float = Field(
        ..., description="Outdoor wet-bulb temperature β€” determines free-cooling potential (Β°C)"
    )

    # ── Chiller ───────────────────────────────────────────────────────────────
    chiller_active: bool
    chiller_setpoint_c: float = Field(
        ..., description="Current chilled-water setpoint [6–15] (Β°C)"
    )
    chiller_cop: float = Field(..., description="Chiller coefficient of performance")
    chiller_fault_detected: bool = Field(
        default=False,
        description="Observable anomaly signal β€” True when COP has degraded abnormally",
    )
    chiller_fault_status: str = Field(
        default="nominal",
        description=(
            "Chiller health status: "
            "'nominal' = fully operational; "
            "'degrading' = COP dropping, still providing partial cooling; "
            "'offline' = chiller has failed and provides zero cooling. "
            "When 'offline', setting chiller_active=true in your action has no effect."
        ),
    )

    # ── Power ─────────────────────────────────────────────────────────────────
    ups_efficiency: float = Field(..., description="UPS efficiency [0–1]")
    current_pue: float = Field(..., description="Real-time Power Usage Effectiveness (1.0 = perfect)")
    free_cooling_potential: float = Field(
        default=0.0,
        description="Fraction of cooling that could be met by free-air economiser [0–1]"
    )

    # ── Carbon ────────────────────────────────────────────────────────────────
    grid_carbon_intensity: str = Field(
        ..., description="Human-readable label: low | medium | high | critical_high"
    )
    carbon_intensity_normalized: float = Field(
        ..., ge=0.0, le=1.0,
        description="Numeric carbon intensity [0.0–1.0] for reward computation"
    )

    # ── Load phase ────────────────────────────────────────────────────────────
    load_curve_phase: str = Field(
        default="idle",
        description="Current phase of the diurnal load curve: ramp_up | peak | ramp_down | idle"
    )

    # ── Zones ─────────────────────────────────────────────────────────────────
    zones: List[ZoneObservation]

    # ── Temporal history buffer ───────────────────────────────────────────────
    history: List[Dict[str, Any]] = Field(
        default_factory=list,
        description=(
            "Last 3 step snapshots. Each entry: "
            "{cold_aisle_temp, hot_aisle_temp, fan_speed_pct, pue} per zone, "
            "flattened as history_t-1, history_t-2, history_t-3"
        )
    )

    # ── Event / context block ─────────────────────────────────────────────────
    sla_violation_streak: int = Field(
        default=0,
        description="Consecutive steps in which any zone was outside the safe temperature band"
    )
    maintenance_active: bool = Field(
        default=False,
        description="True if any zone is currently in a maintenance window"
    )
    maintenance_notes: List[str] = Field(default_factory=list)
    upcoming_events: List[str] = Field(default_factory=list)

    # ── Active alerts (environment-computed, not agent-injected) ──────────────
    active_alerts: List[str] = Field(
        default_factory=list,
        description=(
            "Structured alert strings generated by the environment at each step. "
            "Covers: chiller fault/offline, zone overheating/overcooling, "
            "sensor faults, temperature trends, efficiency hints, SLA streaks. "
            "Agents should act on these before inspecting raw numeric fields."
        ),
    )


# ── Action ────────────────────────────────────────────────────────────────────

class ZoneAdjustment(BaseModel):
    zone_id: str
    fan_speed_pct: float = Field(
        ..., ge=0.0, le=100.0,
        description="Target fan speed for this zone (0–100 %)"
    )
    supply_air_temp_setpoint_c: float = Field(
        ..., ge=16.0, le=26.0,
        description="Target supply air temperature setpoint for this zone [16–26] (Β°C)"
    )


class DCAction(Action):
    """
    Agent action for one environment step.

    Three control levers per zone plus one facility-level lever.
    All zone adjustments must be submitted together; omitting a zone
    leaves its current settings unchanged.
    """

    zone_adjustments: List[ZoneAdjustment] = Field(
        default_factory=list,
        description="Per-zone fan speed and supply air temperature setpoint adjustments"
    )

    # ── Facility-level levers ─────────────────────────────────────────────────
    chiller_setpoint_c: float = Field(
        default=10.0, ge=6.0, le=15.0,
        description="Facility-wide chilled-water setpoint [6–15] (Β°C)"
    )
    chiller_active: bool = Field(
        default=True,
        description=(
            "Toggle chiller on/off. Turning off saves significant power but "
            "temperatures will rise for 3–5 steps before consequences appear."
        )
    )

    # ── Reasoning (graded in hard task) ──────────────────────────────────────
    reasoning: Optional[str] = Field(
        default=None,
        description=(
            "Agent's explanation of its decision. "
            "Graded in the hard task β€” coherent crisis reasoning is rewarded; "
            "inconsistency between stated reasoning and actual action is penalised."
        )
    )


# ── Reward breakdown ──────────────────────────────────────────────────────────

class DCReward(BaseModel):
    total: float = Field(..., description="Combined reward for this step")

    # ── Component rewards ─────────────────────────────────────────────────────
    temp_reward: float = Field(
        default=0.0,
        description="R_temp β€” temperature compliance reward (priority-weighted)"
    )
    pue_reward: float = Field(
        default=0.0,
        description="R_pue β€” efficiency reward relative to PID baseline"
    )
    carbon_reward: float = Field(
        default=0.0,
        description="R_carbon β€” penalty for high cooling power during dirty-grid periods (≀0)"
    )

    # ── Penalties ─────────────────────────────────────────────────────────────
    safety_penalty: float = Field(
        default=0.0,
        description="P_safety β€” hard penalty for temperature safety violations (≀0)"
    )
    roughness_penalty: float = Field(
        default=0.0,
        description="P_roughness β€” penalty for abrupt action changes (≀0)"
    )

    # ── Bonus ─────────────────────────────────────────────────────────────────
    stability_bonus: float = Field(
        default=0.0,
        description="B_stability β€” compounding bonus for sustained temperature compliance (β‰₯0)"
    )

    # ── Legacy / extra ────────────────────────────────────────────────────────
    # Kept for backward compatibility with any existing grader consumers
    temperature_penalty: float = Field(
        default=0.0,
        description="Alias for safety_penalty (legacy field β€” prefer safety_penalty)"
    )
    humidity_penalty: float = Field(
        default=0.0,
        description="Penalty for humidity violations (≀0)"
    )

    breakdown: Dict[str, Any] = Field(
        default_factory=dict,
        description="Per-zone or per-component reward detail for debugging"
    )


# ── Step / reset results ──────────────────────────────────────────────────────

class StepResult(BaseModel):
    observation: DCObservation
    reward: float
    reward_detail: DCReward
    done: bool
    info: Dict[str, Any] = Field(default_factory=dict)


class ResetResult(BaseModel):
    observation: DCObservation
    info: Dict[str, Any] = Field(default_factory=dict)