File size: 12,988 Bytes
1875b13
 
 
 
 
 
0af208b
1875b13
a43b40b
1875b13
60cbc23
1875b13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d74982
1875b13
 
 
0af208b
 
 
 
 
 
 
 
 
 
3b977fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0af208b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a43b40b
1875b13
 
a43b40b
 
 
 
 
 
 
1875b13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d74982
1875b13
0af208b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b977fc
0af208b
 
 
3b977fc
 
 
 
0af208b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1875b13
 
 
 
 
0361922
 
1875b13
 
 
 
0361922
 
 
1875b13
 
 
 
0361922
 
 
 
 
 
0af208b
 
 
 
 
 
 
 
1875b13
 
 
 
 
3b977fc
6d74982
 
 
3b977fc
1875b13
 
 
0af208b
 
 
1875b13
 
 
0af208b
 
 
1875b13
 
 
0af208b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1875b13
 
 
0af208b
 
 
 
 
 
 
 
 
 
 
 
 
 
1875b13
 
 
0af208b
 
 
 
 
 
 
 
1875b13
 
 
0af208b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0361922
 
 
0af208b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
name: gridmind-rl
version: 1.0.0
description: |
  GridMind-RL: Industrial Load-Shaping and Demand-Response Environment.
  An RL environment simulating a real-world building energy management system.
  Control HVAC, thermal storage, and schedule batch jobs in response to
  stochastic electricity prices, grid stress events, and natural language objectives.

author: LOKyu Team
tags:
  - openenv
  - reinforcement-learning
  - energy
  - demand-response
  - continuous-control
  - discrete-control

server:
  host: "0.0.0.0"
  port: 7860
  protocol: http

schemas:
  observation:
    type: object
    properties:
      indoor_temperature:
        type: number
        description: Current building/process temperature (°C)
      thermal_storage_level:
        type: number
        minimum: 0.0
        maximum: 1.0
        description: Thermal storage tank level (0.0=empty, 1.0=full)
      process_demand:
        type: number
        description: Current process industrial power demand (kW)
      current_price:
        type: number
        description: Real-time electricity price ($/kWh)
      grid_stress_signal:
        type: number
        minimum: 0.0
        maximum: 1.0
        description: Utility signal for DR urgency (0=normal, 1=critical)
      carbon_intensity:
        type: number
        description: Grid carbon intensity (gCO2/kWh)
      hour_of_day:
        type: integer
        minimum: 0
        maximum: 23
        description: Hour of the day for time-aware scheduling
      batch_queue:
        type: array
        items:
          type: integer
        description: List of pending batch jobs (values are deadline slot indices)
      cumulative_cost:
        type: number
        description: Total energy cost incurred so far this episode ($)
      step:
        type: integer
        minimum: 0
        maximum: 95
        description: Current episode step index (0–95); 96 steps = 24 hours at 15-min resolution
      building_id:
        type: integer
        description: Building identifier for multi-building federation
      hvac_efficiency:
        type: number
        minimum: 0.0
        maximum: 1.0
        description: "Current HVAC efficiency multiplier (1.0=new, degrades over episode). Track 5."
      active_faults:
        type: array
        items:
          type: string
        description: "Human-readable list of active fault alarm strings. Empty when no faults. Track 3."
      task_card:
        type: string
      nl_summary:
        type: string
      market_type:
        type: string
      season:
        type: string
      price_volatility:
        type: number
      price_forecast:
        type: array
        items:
          type: number
      demand_charge_active:
        type: boolean
      instruction_card:
        type: [object, "null"]
        description: "Natural language objective card. Only populated when task_id=4. Track 2."
        properties:
          text:
            type: string
            description: "Human-readable instruction for the episode."
          targets:
            type: object
            description: "Machine-readable KPI targets keyed by metric name."
            additionalProperties:
              type: number
          weights:
            type: object
            description: "Scoring weights for each KPI target."
            additionalProperties:
              type: number

  action:
    type: object
    required:
      [
        "hvac_power_level",
        "thermal_charge_rate",
        "batch_job_slot",
        "load_shed_fraction",
      ]
    properties:
      hvac_power_level:
        type: number
        minimum: 0.0
        maximum: 1.0
        description: Fraction of max HVAC power to apply (0.0-1.0)
      thermal_charge_rate:
        type: number
        minimum: -1.0
        maximum: 1.0
        description: Thermal storage charge (+) or discharge (-) rate
      batch_job_slot:
        type: integer
        minimum: 0
        maximum: 4
        description: Which time slot to schedule the next batch job (0=now, 1-4=defer)
      load_shed_fraction:
        type: number
        minimum: 0.0
        maximum: 0.5
        description: Fraction of non-critical load to shed during grid stress (0.0-0.5)
      building_id:
        type: integer
        description: Building identifier for multi-building federation

  reward:
    type: number
    description: Dense multi-component reward (cost, optional temperature/grid/carbon/deadlines) task-gated to match objectives.

  reset_request:
    type: object
    properties:
      seed:
        type: integer
        description: Optional random seed for reproducibility
      task_id:
        type: integer
        minimum: 1
        maximum: 4
        description: "Task ID (1-4): 1=cost, 2=temp, 3=demand_response, 4=instruction_following"
      difficulty:
        type: string
        enum: ["easy", "medium", "hard"]
        description: Task difficulty override
      num_buildings:
        type: integer
        minimum: 1
        maximum: 3
        description: Number of buildings in federation for multi-agent demo

  reset_response:
    type: object
    properties:
      observations:
        type: array
        items:
          $ref: "#/schemas/observation"
      episode:
        type: integer
        description: Current episode number
      task_id:
        type: integer
        description: Task ID for this episode
      seed:
        type: integer
        description: Random seed used
      instruction_card:
        $ref: "#/schemas/observation/properties/instruction_card"

  step_request:
    type: [object, array]
    description: Single action object or array of actions for multi-building
    items:
      $ref: "#/schemas/action"

  step_response:
    type: object
    properties:
      observation:
        $ref: "#/schemas/observation"
      reward:
        type: number
        description: Total reward for this step
      done:
        type: boolean
        description: Episode complete flag
      info:
        type: object
        properties:
          reward_components:
            type: object
            properties:
              cost_savings:
                type: number
              temp_constraint:
                type: number
              grid_response:
                type: number
              deadline_penalty:
                type: number
              efficiency_bonus:
                type: number
              stability_penalty:
                type: number
              carbon_reward:
                type: number
              task_satisfaction:
                type: number
              fault_mitigation:
                type: number
              price_anticipation:
                type: number
              demand_charge_penalty:
                type: number
              total:
                type: number
          energy_used_kwh:
            type: number
          carbon_emitted_gco2:
            type: number
          price_signal:
            type: number
          grid_stress:
            type: number
          batch_completed:
            type: array
            items:
              type: integer
          batch_missed:
            type: array
            items:
              type: integer
          episode:
            type: integer
          step:
            type: integer

  feeder_state:
    type: object
    properties:
      total_demand_kw:
        type: number
        description: Total fleet demand in kW
      feeder_limit_kw:
        type: number
        description: Feeder capacity limit
      feeder_overload:
        type: boolean
        description: Whether total demand exceeds limit
      utilization_pct:
        type: number
        description: Utilization percentage
      buildings:
        type: array
        items:
          type: object
          properties:
            building_id:
              type: integer
            current_demand_kw:
              type: number
            indoor_temperature:
              type: number
            thermal_storage_level:
              type: number
            cumulative_cost:
              type: number
            grid_stress_signal:
              type: number
            price_multiplier:
              type: number
      price_curve_hourly:
        type: array
        items:
          type: number
        description: 24-point hourly price curve
      step:
        type: integer
      episode:
        type: integer

  coordinate_request:
    type: object
    properties:
      price_multipliers:
        type: array
        items:
          type: number
        description: Per-building price multipliers (default 1.0)

  simulate_request:
    type: array
    items:
      $ref: "#/schemas/action"
    description: Array of actions to simulate

  simulate_response:
    type: object
    properties:
      results:
        type: array
        items:
          $ref: "#/schemas/step_response"
      done:
        type: boolean
        description: Whether episode would be done after simulated step

tasks:
  - id: 1
    name: "Cost Minimization"
    description: "Minimize total energy cost over a 24-hour episode with no process constraints."
    difficulty: "easy"
    weights:
      cost: 1.0
  - id: 2
    name: "Constrained Temperature Management"
    description: "Minimize cost while keeping indoor temperature within ±2°C of setpoint at all times."
    difficulty: "medium"
    weights:
      cost: 0.6
      temperature: 0.4
  - id: 3
    name: "Full Demand-Response with Batch Scheduling"
    description: "Minimize cost, maintain temperature, respond to grid stress events, schedule all batch jobs, and minimize carbon."
    difficulty: "hard"
    weights:
      cost: 0.28
      temperature: 0.20
      grid_response: 0.20
      batch_deadline: 0.12
      carbon: 0.20
  - id: 4
    name: "Instruction-Following Operator"
    description: "Complete a randomly sampled natural-language objective card specifying KPI targets for cost, temperature, and carbon over 24h."
    difficulty: "hard"
    weights:
      task_completion: 0.50
      cost: 0.30
      temperature: 0.20

endpoints:
  health:
    path: /health
    method: GET
    description: 'Health check - returns {"status": "ok", "version": "1.0.0"}'
  ping:
    path: /ping
    method: GET
    description: 'Liveness probe - returns {"status": "ok"}'
  reset:
    path: /reset
    method: POST
    description: Start new episode
    request_schema: "#/schemas/reset_request"
    response_schema: "#/schemas/reset_response"
  step:
    path: /step
    method: POST
    description: Execute action in environment
    request_schema: "#/schemas/step_request"
    response_schema: "#/schemas/step_response"
  state:
    path: /state
    method: GET
    description: Get current environment state
    response_schema:
      type: object
      properties:
        buildings:
          type: array
          items:
            type: object
        price_curve_episode:
          type: array
          items:
            type: number
        carbon_curve_episode:
          type: array
          items:
            type: number
        episode:
          type: integer
        step:
          type: integer
        task_id:
          type: integer
        done:
          type: boolean
        seed:
          type: integer
  grade:
    path: /grade
    method: GET
    description: Grade completed episode
    response_schema:
      type: object
      properties:
        task_id:
          type: integer
        score:
          type: number
        sub_scores:
          type: object
        exploit_detected:
          type: boolean
        penalty_applied:
          type: number
  replay:
    path: /replay
    method: GET
    description: Get episode replay data
    response_schema:
      type: object
      properties:
        replay:
          type: array
        steps:
          type: integer
  tasks:
    path: /tasks
    method: GET
    description: List available tasks
    response_schema:
      type: array
      items:
        type: object
        properties:
          id:
            type: integer
          name:
            type: string
          description:
            type: string
          difficulty:
            type: string
          weights:
            type: object
  metrics:
    path: /metrics
    method: GET
    description: Prometheus metrics
    response_content_type: text/plain
  feeder:
    path: /feeder
    method: GET
    description: Get aggregate fleet state for coordinator
    response_schema: "#/schemas/feeder_state"
  coordinate:
    path: /coordinate
    method: POST
    description: Set per-building price multipliers from coordinator
    request_schema: "#/schemas/coordinate_request"
  simulate:
    path: /simulate
    method: POST
    description: Simulate world model prediction without advancing environment
    request_schema: "#/schemas/simulate_request"
    response_schema: "#/schemas/simulate_response"