adityss commited on
Commit
4c1963b
·
1 Parent(s): baea0a8

Enhance dashboard: Live Simulation, 72h episodes, and step reward tracking curve

Browse files
dashboard/static/dashboard.js CHANGED
@@ -7,7 +7,7 @@
7
 
8
  // ── Config ──────────────────────────────────────────────────────────────────
9
  const POLL_MS = 500;
10
- const HISTORY_LEN = 96; // 96 steps = full episode
11
  const API_BASE = '/api';
12
  const TASK_NAMES = {
13
  1: 'Task 1 — Cost Minimization (Easy)',
@@ -95,8 +95,8 @@ function makeBarChart(id, labels, datasets) {
95
  }
96
 
97
  // ── Initialise all charts ─────────────────────────────────────────────────────
98
- const emptyLabels = Array.from({ length: 24 }, (_, i) => `${i}h`);
99
- const emptyData = Array(24).fill(null);
100
 
101
  // 1. Price curve
102
  const priceChart = makeLineChart('chart-price',
@@ -258,16 +258,13 @@ const carbonChart = makeLineChart('chart-carbon',
258
  { yAxis: { title: { display: true, text: 'gCO₂/kWh' } } }
259
  );
260
 
261
- // 8. Reward components bar
262
- const rewardChart = makeBarChart('chart-reward',
263
  [],
264
  [
265
- { label: 'Cost Savings', data: [], backgroundColor: rgba(COLORS.green, 0.8) },
266
- { label: 'Temp Constraint',data: [], backgroundColor: rgba(COLORS.cyan, 0.8) },
267
- { label: 'Grid Response', data: [], backgroundColor: rgba(COLORS.blue, 0.8) },
268
- { label: 'Efficiency', data: [], backgroundColor: rgba(COLORS.purple, 0.7) },
269
- { label: 'Penalties', data: [], backgroundColor: rgba(COLORS.red, 0.8) },
270
- ]
271
  );
272
 
273
  // ── Stress meter bars ────────────────────────────────────────────────────────
@@ -309,7 +306,7 @@ function renderGantt(jobs, currentStep) {
309
  wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
310
  return;
311
  }
312
- const totalSlots = 96;
313
  wrap.innerHTML = '';
314
  jobs.forEach(job => {
315
  const row = document.createElement('div');
@@ -419,7 +416,7 @@ async function fetchAndUpdate() {
419
  const hourOfDay = b.hour_of_day || 0;
420
 
421
  // ── Header ──
422
- document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/95`;
423
  document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
424
 
425
  // ── KPIs ──
@@ -447,21 +444,21 @@ async function fetchAndUpdate() {
447
  document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
448
 
449
  // ── Price curve chart ──
450
- if (state.price_curve_24h && state.price_curve_24h.length === 24) {
451
- const labels = Array.from({ length: 24 }, (_, i) => `${i}:00`);
452
  priceChart.data.labels = labels;
453
- priceChart.data.datasets[0].data = state.price_curve_24h;
454
  // Current position marker
455
- const marker = Array(24).fill(null);
456
- marker[hourOfDay] = state.price_curve_24h[hourOfDay];
457
  priceChart.data.datasets[1].data = marker;
458
  priceChart.update('none');
459
  }
460
 
461
  // ── Carbon curve ──
462
- if (state.carbon_curve_24h && state.carbon_curve_24h.length === 24) {
463
- carbonChart.data.labels = Array.from({ length: 24 }, (_, i) => `${i}:00`);
464
- carbonChart.data.datasets[0].data = state.carbon_curve_24h;
465
  carbonChart.update('none');
466
  }
467
 
@@ -530,16 +527,9 @@ async function fetchAndUpdate() {
530
  stressChart.data.datasets[0].data = b.reward_history.map(r => Math.max(0, r.grid_response || 0));
531
  stressChart.update('none');
532
 
533
- // Reward breakdown chart (last 20 steps)
534
- const recent = b.reward_history.slice(-20);
535
- rewardChart.data.labels = Array.from({ length: recent.length }, (_, i) => n - recent.length + i);
536
- rewardChart.data.datasets[0].data = recent.map(r => Math.max(0, r.cost_savings || 0));
537
- rewardChart.data.datasets[1].data = recent.map(r => Math.max(0, r.temp_constraint || 0));
538
- rewardChart.data.datasets[2].data = recent.map(r => Math.max(0, r.grid_response || 0));
539
- rewardChart.data.datasets[3].data = recent.map(r => Math.max(0, r.efficiency_bonus || 0));
540
- rewardChart.data.datasets[4].data = recent.map(r =>
541
- Math.abs(r.deadline_penalty || 0) + Math.abs(r.stability_penalty || 0)
542
- );
543
  rewardChart.update('none');
544
 
545
  // Reward rows (last step)
@@ -580,6 +570,46 @@ async function doReset() {
580
  document.getElementById('grade-result').textContent = '';
581
  }
582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  async function doGrade() {
584
  try {
585
  const res = await fetch(`${API_BASE}/grade`);
 
7
 
8
  // ── Config ──────────────────────────────────────────────────────────────────
9
  const POLL_MS = 500;
10
+ const HISTORY_LEN = 288; // 288 steps = full episode
11
  const API_BASE = '/api';
12
  const TASK_NAMES = {
13
  1: 'Task 1 — Cost Minimization (Easy)',
 
95
  }
96
 
97
  // ── Initialise all charts ─────────────────────────────────────────────────────
98
+ const emptyLabels = Array.from({ length: 72 }, (_, i) => `${i}h`);
99
+ const emptyData = Array(72).fill(null);
100
 
101
  // 1. Price curve
102
  const priceChart = makeLineChart('chart-price',
 
258
  { yAxis: { title: { display: true, text: 'gCO₂/kWh' } } }
259
  );
260
 
261
+ // 8. Reward timeline curve
262
+ const rewardChart = makeLineChart('chart-reward',
263
  [],
264
  [
265
+ { label: 'Step Reward', data: [], borderColor: COLORS.green, backgroundColor: rgba(COLORS.green, 0.1), borderWidth: 2, fill: true, tension: 0.4, pointRadius: 0 },
266
+ ],
267
+ { yAxis: { title: { display: true, text: 'Reward' } } }
 
 
 
268
  );
269
 
270
  // ── Stress meter bars ────────────────────────────────────────────────────────
 
306
  wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
307
  return;
308
  }
309
+ const totalSlots = 288;
310
  wrap.innerHTML = '';
311
  jobs.forEach(job => {
312
  const row = document.createElement('div');
 
416
  const hourOfDay = b.hour_of_day || 0;
417
 
418
  // ── Header ──
419
+ document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/287`;
420
  document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
421
 
422
  // ── KPIs ──
 
444
  document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
445
 
446
  // ── Price curve chart ──
447
+ if (state.price_curve_episode && state.price_curve_episode.length === 72) {
448
+ const labels = Array.from({ length: 72 }, (_, i) => `${i}:00`);
449
  priceChart.data.labels = labels;
450
+ priceChart.data.datasets[0].data = state.price_curve_episode;
451
  // Current position marker
452
+ const marker = Array(72).fill(null);
453
+ marker[Math.floor(step / 4)] = state.price_curve_episode[Math.floor(step / 4)];
454
  priceChart.data.datasets[1].data = marker;
455
  priceChart.update('none');
456
  }
457
 
458
  // ── Carbon curve ──
459
+ if (state.carbon_curve_episode && state.carbon_curve_episode.length === 72) {
460
+ carbonChart.data.labels = Array.from({ length: 72 }, (_, i) => `${i}:00`);
461
+ carbonChart.data.datasets[0].data = state.carbon_curve_episode;
462
  carbonChart.update('none');
463
  }
464
 
 
527
  stressChart.data.datasets[0].data = b.reward_history.map(r => Math.max(0, r.grid_response || 0));
528
  stressChart.update('none');
529
 
530
+ // Total reward timeline chart (full episode)
531
+ rewardChart.data.labels = Array.from({ length: n }, (_, i) => i);
532
+ rewardChart.data.datasets[0].data = b.reward_history.map(r => r.total || 0);
 
 
 
 
 
 
 
533
  rewardChart.update('none');
534
 
535
  // Reward rows (last step)
 
570
  document.getElementById('grade-result').textContent = '';
571
  }
572
 
573
+ let liveSimTimer = null;
574
+ let isLiveSimulating = false;
575
+
576
+ function toggleLiveSim() {
577
+ const btn = document.getElementById('btn-live');
578
+ if (isLiveSimulating) {
579
+ // Stop live sim
580
+ clearInterval(liveSimTimer);
581
+ isLiveSimulating = false;
582
+ btn.textContent = '▶ Start Live Simulation';
583
+ btn.style.background = 'var(--accent-green)';
584
+ } else {
585
+ // Start live sim
586
+ isLiveSimulating = true;
587
+ btn.textContent = '⏸ Pause Live Simulation';
588
+ btn.style.background = 'var(--accent-amber)';
589
+
590
+ liveSimTimer = setInterval(async () => {
591
+ // Step the environment automatically with a simple heuristic policy
592
+ const taskId = parseInt(document.getElementById('task-select').value, 10);
593
+ try {
594
+ await fetch(`${API_BASE}/step`, {
595
+ method: 'POST',
596
+ headers: { 'Content-Type': 'application/json' },
597
+ body: JSON.stringify({
598
+ hvac_power_level: 0.5,
599
+ thermal_charge_rate: 0.0,
600
+ batch_job_slot: 0,
601
+ load_shed_fraction: 0.0,
602
+ building_id: currentBuilding
603
+ }),
604
+ });
605
+ // fetchAndUpdate() will catch the change via polling
606
+ } catch (e) {
607
+ console.error(e);
608
+ }
609
+ }, 400); // 400ms per step
610
+ }
611
+ }
612
+
613
  async function doGrade() {
614
  try {
615
  const res = await fetch(`${API_BASE}/grade`);
dashboard/static/index.html CHANGED
@@ -514,7 +514,7 @@
514
 
515
  <!-- Row 1: Price curve + Temperature + Controls -->
516
  <div class="card col-8">
517
- <div class="card-title"><span class="icon">💰</span> 24h Electricity Price Curve</div>
518
  <div class="chart-wrap">
519
  <canvas id="chart-price"></canvas>
520
  </div>
@@ -587,7 +587,7 @@
587
  </div>
588
 
589
  <div class="card col-6">
590
- <div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (24h)</div>
591
  <div class="chart-wrap">
592
  <canvas id="chart-carbon"></canvas>
593
  </div>
@@ -608,6 +608,7 @@
608
  <option value="2">Building 3</option>
609
  </select>
610
  <button id="btn-reset" class="btn primary" onclick="doReset()">↺ New Episode</button>
 
611
  <button class="btn" onclick="doGrade()">📋 Grade Episode</button>
612
  <button class="btn" onclick="window.open('/api/replay')">📥 Export Replay</button>
613
  <span id="grade-result" style="font-family:var(--font-mono);font-size:0.9rem;color:var(--accent-green)"></span>
 
514
 
515
  <!-- Row 1: Price curve + Temperature + Controls -->
516
  <div class="card col-8">
517
+ <div class="card-title"><span class="icon">💰</span> Electricity Price Curve (72h)</div>
518
  <div class="chart-wrap">
519
  <canvas id="chart-price"></canvas>
520
  </div>
 
587
  </div>
588
 
589
  <div class="card col-6">
590
+ <div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (72h)</div>
591
  <div class="chart-wrap">
592
  <canvas id="chart-carbon"></canvas>
593
  </div>
 
608
  <option value="2">Building 3</option>
609
  </select>
610
  <button id="btn-reset" class="btn primary" onclick="doReset()">↺ New Episode</button>
611
+ <button id="btn-live" class="btn" style="background:var(--accent-green);color:#fff;border:none;" onclick="toggleLiveSim()">▶ Start Live Simulation</button>
612
  <button class="btn" onclick="doGrade()">📋 Grade Episode</button>
613
  <button class="btn" onclick="window.open('/api/replay')">📥 Export Replay</button>
614
  <span id="grade-result" style="font-family:var(--font-mono);font-size:0.9rem;color:var(--accent-green)"></span>
env/environment.go CHANGED
@@ -11,7 +11,7 @@ import (
11
  )
12
 
13
  const (
14
- EpisodeSteps = 96 // 24 hours × 15-min intervals
15
  StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
16
  MaxBuildings = 3
17
  DefaultSetpoint = 21.0 // °C comfortable indoor temp
@@ -219,9 +219,9 @@ func (e *Environment) GetState() StateResponse {
219
  buildings[i] = pub
220
  }
221
 
222
- priceCurve := make([]float64, 24)
223
- carbonCurve := make([]float64, 24)
224
- for h := 0; h < 24; h++ {
225
  stepIdx := h * 4
226
  if stepIdx < EpisodeSteps {
227
  priceCurve[h] = e.PriceCurve[stepIdx]
 
11
  )
12
 
13
  const (
14
+ EpisodeSteps = 288 // 72 hours × 15-min intervals
15
  StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
16
  MaxBuildings = 3
17
  DefaultSetpoint = 21.0 // °C comfortable indoor temp
 
219
  buildings[i] = pub
220
  }
221
 
222
+ priceCurve := make([]float64, EpisodeSteps/4)
223
+ carbonCurve := make([]float64, EpisodeSteps/4)
224
+ for h := 0; h < EpisodeSteps/4; h++ {
225
  stepIdx := h * 4
226
  if stepIdx < EpisodeSteps {
227
  priceCurve[h] = e.PriceCurve[stepIdx]
env/models.go CHANGED
@@ -124,8 +124,8 @@ type ResetResponse struct {
124
  // StateResponse is returned from GET /state.
125
  type StateResponse struct {
126
  Buildings []BuildingStatePublic `json:"buildings"`
127
- PriceCurve []float64 `json:"price_curve_24h"` // full 24h ToU prices
128
- CarbonCurve []float64 `json:"carbon_curve_24h"` // full 24h carbon intensities
129
  Episode int `json:"episode"`
130
  Step int `json:"step"`
131
  TaskID int `json:"task_id"`
 
124
  // StateResponse is returned from GET /state.
125
  type StateResponse struct {
126
  Buildings []BuildingStatePublic `json:"buildings"`
127
+ PriceCurve []float64 `json:"price_curve_episode"` // full episode ToU prices
128
+ CarbonCurve []float64 `json:"carbon_curve_episode"` // full episode carbon intensities
129
  Episode int `json:"episode"`
130
  Step int `json:"step"`
131
  TaskID int `json:"task_id"`
python/inference.py CHANGED
@@ -239,7 +239,9 @@ def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
239
  total_steps = 0
240
  start_time = time.time()
241
 
242
- for _step in range(96):
 
 
243
  action = agent.choose_action(obs, task_id)
244
  step_resp = env_client.step(action)
245
 
@@ -253,9 +255,7 @@ def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
253
  f"stress={obs['grid_stress_signal']:.2f} "
254
  f"cost=${obs['cumulative_cost']:.2f} "
255
  f"reward={step_resp['reward']:.3f}")
256
-
257
- if step_resp.get("done", False):
258
- break
259
 
260
  elapsed = time.time() - start_time
261
  grade = env_client.grade()
 
239
  total_steps = 0
240
  start_time = time.time()
241
 
242
+ step_resp = {}
243
+ _step = 0
244
+ while not step_resp.get("done", False):
245
  action = agent.choose_action(obs, task_id)
246
  step_resp = env_client.step(action)
247
 
 
255
  f"stress={obs['grid_stress_signal']:.2f} "
256
  f"cost=${obs['cumulative_cost']:.2f} "
257
  f"reward={step_resp['reward']:.3f}")
258
+ _step += 1
 
 
259
 
260
  elapsed = time.time() - start_time
261
  grade = env_client.grade()
python/models.py CHANGED
@@ -29,7 +29,7 @@ class ObservationModel(BaseModel):
29
  hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
30
  batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
31
  cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
32
- step: int = Field(..., ge=0, description="Current timestep (0–95)")
33
  building_id: int = Field(default=0, description="Building index in federation")
34
 
35
 
@@ -137,8 +137,8 @@ class BuildingStatePublic(BaseModel):
137
  class StateResponse(BaseModel):
138
  """Full environment state from GET /state."""
139
  buildings: List[BuildingStatePublic]
140
- price_curve_24h: List[float]
141
- carbon_curve_24h: List[float]
142
  episode: int
143
  step: int
144
  task_id: int
 
29
  hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
30
  batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
31
  cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
32
+ step: int = Field(..., ge=0, description="Current timestep (0–287)")
33
  building_id: int = Field(default=0, description="Building index in federation")
34
 
35
 
 
137
  class StateResponse(BaseModel):
138
  """Full environment state from GET /state."""
139
  buildings: List[BuildingStatePublic]
140
+ price_curve_episode: List[float]
141
+ carbon_curve_episode: List[float]
142
  episode: int
143
  step: int
144
  task_id: int
python/validate.py CHANGED
@@ -168,8 +168,11 @@ def validate(env_url: str) -> bool:
168
  post(f"{base}/reset", {"task_id": 1, "seed": 777})
169
  action = {"hvac_power_level": 0.3, "thermal_charge_rate": 0.0,
170
  "batch_job_slot": 0, "load_shed_fraction": 0.0}
171
- for _ in range(10):
172
- post(f"{base}/step", action)
 
 
 
173
  r = get(f"{base}/grade")
174
  results.append(check("GET /grade returns 200", r.status_code == 200))
175
  grade = r.json()
@@ -217,9 +220,12 @@ def validate(env_url: str) -> bool:
217
  # Two different policies
218
  for a in [0.1, 0.9]:
219
  post(f"{base}/reset", {"task_id": 1, "seed": seed})
220
- for _ in range(96):
221
- post(f"{base}/step", {"hvac_power_level": a, "thermal_charge_rate": 0,
 
222
  "batch_job_slot": 0, "load_shed_fraction": 0})
 
 
223
  g = requests.get(f"{base}/grade", timeout=10).json()
224
  sc = g.get("score", 0)
225
  scores_nonzero.append(sc > 0.01)
 
168
  post(f"{base}/reset", {"task_id": 1, "seed": 777})
169
  action = {"hvac_power_level": 0.3, "thermal_charge_rate": 0.0,
170
  "batch_job_slot": 0, "load_shed_fraction": 0.0}
171
+ done = False
172
+ while not done:
173
+ r2 = post(f"{base}/step", action)
174
+ if r2.json().get("done"):
175
+ done = True
176
  r = get(f"{base}/grade")
177
  results.append(check("GET /grade returns 200", r.status_code == 200))
178
  grade = r.json()
 
220
  # Two different policies
221
  for a in [0.1, 0.9]:
222
  post(f"{base}/reset", {"task_id": 1, "seed": seed})
223
+ done = False
224
+ while not done:
225
+ r2 = post(f"{base}/step", {"hvac_power_level": a, "thermal_charge_rate": 0,
226
  "batch_job_slot": 0, "load_shed_fraction": 0})
227
+ if r2.json().get("done"):
228
+ done = True
229
  g = requests.get(f"{base}/grade", timeout=10).json()
230
  sc = g.get("score", 0)
231
  scores_nonzero.append(sc > 0.01)
tests/environment_test.go CHANGED
@@ -57,19 +57,19 @@ func TestStepAdvancesState(t *testing.T) {
57
  }
58
  }
59
 
60
- // TestEpisodeLengthIs96 verifies the episode terminates at step 96.
61
- func TestEpisodeLengthIs96(t *testing.T) {
62
  e := env.NewEnvironment()
63
  var seed int64 = 99
64
  e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
65
 
66
  action := []env.ActionModel{{HVACPowerLevel: 0.5}}
67
  var lastDone bool
68
- for i := 0; i < 96; i++ {
69
  _, lastDone = e.Step(action)
70
  }
71
  if !lastDone {
72
- t.Errorf("episode should be done after 96 steps")
73
  }
74
  }
75
 
@@ -162,7 +162,7 @@ func TestGraderTask1ScoreRange(t *testing.T) {
162
  e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
163
 
164
  action := []env.ActionModel{{HVACPowerLevel: 0.3}}
165
- for i := 0; i < 96; i++ {
166
  e.Step(action)
167
  }
168
 
 
57
  }
58
  }
59
 
60
+ // TestEpisodeLengthIs288 verifies the episode terminates at step 288.
61
+ func TestEpisodeLengthIs288(t *testing.T) {
62
  e := env.NewEnvironment()
63
  var seed int64 = 99
64
  e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
65
 
66
  action := []env.ActionModel{{HVACPowerLevel: 0.5}}
67
  var lastDone bool
68
+ for i := 0; i < 288; i++ {
69
  _, lastDone = e.Step(action)
70
  }
71
  if !lastDone {
72
+ t.Errorf("episode should be done after 288 steps")
73
  }
74
  }
75
 
 
162
  e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
163
 
164
  action := []env.ActionModel{{HVACPowerLevel: 0.3}}
165
+ for i := 0; i < 288; i++ {
166
  e.Step(action)
167
  }
168
 
tests/test_graders.py CHANGED
@@ -51,10 +51,11 @@ def grade() -> dict:
51
  def run_full_episode(task_id: int, seed: int, hvac: float = 0.5) -> dict:
52
  reset(task_id=task_id, seed=seed)
53
  action = {"hvac_power_level": hvac, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0}
54
- for _ in range(96):
 
55
  resp = step(action)
56
  if resp.get("done"):
57
- break
58
  return grade()
59
 
60
 
@@ -86,8 +87,11 @@ class TestTask1:
86
  """Always shedding 50% should be detected and penalized."""
87
  reset(task_id=1, seed=10)
88
  action = {"hvac_power_level": 0.5, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0.5}
89
- for _ in range(96):
90
- step(action)
 
 
 
91
  g = grade()
92
  # Score should be reduced OR exploit flagged
93
  assert g["exploit_detected"] or g["score"] < 0.9
@@ -165,9 +169,10 @@ class TestMultiBuilding:
165
  {"hvac_power_level": 0.4, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0, "building_id": 0},
166
  {"hvac_power_level": 0.6, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0, "building_id": 1},
167
  ]
168
- for _ in range(96):
 
169
  r = requests.post(f"{BASE}/step", json=action)
170
  if r.json()[0].get("done"):
171
- break
172
  g = grade()
173
  assert 0.0 <= g["score"] <= 1.0
 
51
  def run_full_episode(task_id: int, seed: int, hvac: float = 0.5) -> dict:
52
  reset(task_id=task_id, seed=seed)
53
  action = {"hvac_power_level": hvac, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0}
54
+ done = False
55
+ while not done:
56
  resp = step(action)
57
  if resp.get("done"):
58
+ done = True
59
  return grade()
60
 
61
 
 
87
  """Always shedding 50% should be detected and penalized."""
88
  reset(task_id=1, seed=10)
89
  action = {"hvac_power_level": 0.5, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0.5}
90
+ done = False
91
+ while not done:
92
+ resp = step(action)
93
+ if resp.get("done"):
94
+ done = True
95
  g = grade()
96
  # Score should be reduced OR exploit flagged
97
  assert g["exploit_detected"] or g["score"] < 0.9
 
169
  {"hvac_power_level": 0.4, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0, "building_id": 0},
170
  {"hvac_power_level": 0.6, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0, "building_id": 1},
171
  ]
172
+ done = False
173
+ while not done:
174
  r = requests.post(f"{BASE}/step", json=action)
175
  if r.json()[0].get("done"):
176
+ done = True
177
  g = grade()
178
  assert 0.0 <= g["score"] <= 1.0