Enhance dashboard: Live Simulation, 72h episodes, and step reward tracking curve
Browse files- dashboard/static/dashboard.js +61 -31
- dashboard/static/index.html +3 -2
- env/environment.go +4 -4
- env/models.go +2 -2
- python/inference.py +4 -4
- python/models.py +3 -3
- python/validate.py +10 -4
- tests/environment_test.go +5 -5
- tests/test_graders.py +11 -6
dashboard/static/dashboard.js
CHANGED
|
@@ -7,7 +7,7 @@
|
|
| 7 |
|
| 8 |
// ── Config ──────────────────────────────────────────────────────────────────
|
| 9 |
const POLL_MS = 500;
|
| 10 |
-
const HISTORY_LEN =
|
| 11 |
const API_BASE = '/api';
|
| 12 |
const TASK_NAMES = {
|
| 13 |
1: 'Task 1 — Cost Minimization (Easy)',
|
|
@@ -95,8 +95,8 @@ function makeBarChart(id, labels, datasets) {
|
|
| 95 |
}
|
| 96 |
|
| 97 |
// ── Initialise all charts ─────────────────────────────────────────────────────
|
| 98 |
-
const emptyLabels = Array.from({ length:
|
| 99 |
-
const emptyData = Array(
|
| 100 |
|
| 101 |
// 1. Price curve
|
| 102 |
const priceChart = makeLineChart('chart-price',
|
|
@@ -258,16 +258,13 @@ const carbonChart = makeLineChart('chart-carbon',
|
|
| 258 |
{ yAxis: { title: { display: true, text: 'gCO₂/kWh' } } }
|
| 259 |
);
|
| 260 |
|
| 261 |
-
// 8. Reward
|
| 262 |
-
const rewardChart =
|
| 263 |
[],
|
| 264 |
[
|
| 265 |
-
{ label: '
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
{ label: 'Efficiency', data: [], backgroundColor: rgba(COLORS.purple, 0.7) },
|
| 269 |
-
{ label: 'Penalties', data: [], backgroundColor: rgba(COLORS.red, 0.8) },
|
| 270 |
-
]
|
| 271 |
);
|
| 272 |
|
| 273 |
// ── Stress meter bars ────────────────────────────────────────────────────────
|
|
@@ -309,7 +306,7 @@ function renderGantt(jobs, currentStep) {
|
|
| 309 |
wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
|
| 310 |
return;
|
| 311 |
}
|
| 312 |
-
const totalSlots =
|
| 313 |
wrap.innerHTML = '';
|
| 314 |
jobs.forEach(job => {
|
| 315 |
const row = document.createElement('div');
|
|
@@ -419,7 +416,7 @@ async function fetchAndUpdate() {
|
|
| 419 |
const hourOfDay = b.hour_of_day || 0;
|
| 420 |
|
| 421 |
// ── Header ──
|
| 422 |
-
document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/
|
| 423 |
document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
|
| 424 |
|
| 425 |
// ── KPIs ──
|
|
@@ -447,21 +444,21 @@ async function fetchAndUpdate() {
|
|
| 447 |
document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
|
| 448 |
|
| 449 |
// ── Price curve chart ──
|
| 450 |
-
if (state.
|
| 451 |
-
const labels = Array.from({ length:
|
| 452 |
priceChart.data.labels = labels;
|
| 453 |
-
priceChart.data.datasets[0].data = state.
|
| 454 |
// Current position marker
|
| 455 |
-
const marker = Array(
|
| 456 |
-
marker[
|
| 457 |
priceChart.data.datasets[1].data = marker;
|
| 458 |
priceChart.update('none');
|
| 459 |
}
|
| 460 |
|
| 461 |
// ── Carbon curve ──
|
| 462 |
-
if (state.
|
| 463 |
-
carbonChart.data.labels = Array.from({ length:
|
| 464 |
-
carbonChart.data.datasets[0].data = state.
|
| 465 |
carbonChart.update('none');
|
| 466 |
}
|
| 467 |
|
|
@@ -530,16 +527,9 @@ async function fetchAndUpdate() {
|
|
| 530 |
stressChart.data.datasets[0].data = b.reward_history.map(r => Math.max(0, r.grid_response || 0));
|
| 531 |
stressChart.update('none');
|
| 532 |
|
| 533 |
-
//
|
| 534 |
-
|
| 535 |
-
rewardChart.data.
|
| 536 |
-
rewardChart.data.datasets[0].data = recent.map(r => Math.max(0, r.cost_savings || 0));
|
| 537 |
-
rewardChart.data.datasets[1].data = recent.map(r => Math.max(0, r.temp_constraint || 0));
|
| 538 |
-
rewardChart.data.datasets[2].data = recent.map(r => Math.max(0, r.grid_response || 0));
|
| 539 |
-
rewardChart.data.datasets[3].data = recent.map(r => Math.max(0, r.efficiency_bonus || 0));
|
| 540 |
-
rewardChart.data.datasets[4].data = recent.map(r =>
|
| 541 |
-
Math.abs(r.deadline_penalty || 0) + Math.abs(r.stability_penalty || 0)
|
| 542 |
-
);
|
| 543 |
rewardChart.update('none');
|
| 544 |
|
| 545 |
// Reward rows (last step)
|
|
@@ -580,6 +570,46 @@ async function doReset() {
|
|
| 580 |
document.getElementById('grade-result').textContent = '';
|
| 581 |
}
|
| 582 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 583 |
async function doGrade() {
|
| 584 |
try {
|
| 585 |
const res = await fetch(`${API_BASE}/grade`);
|
|
|
|
| 7 |
|
| 8 |
// ── Config ──────────────────────────────────────────────────────────────────
|
| 9 |
const POLL_MS = 500;
|
| 10 |
+
const HISTORY_LEN = 288; // 288 steps = full episode
|
| 11 |
const API_BASE = '/api';
|
| 12 |
const TASK_NAMES = {
|
| 13 |
1: 'Task 1 — Cost Minimization (Easy)',
|
|
|
|
| 95 |
}
|
| 96 |
|
| 97 |
// ── Initialise all charts ─────────────────────────────────────────────────────
|
| 98 |
+
const emptyLabels = Array.from({ length: 72 }, (_, i) => `${i}h`);
|
| 99 |
+
const emptyData = Array(72).fill(null);
|
| 100 |
|
| 101 |
// 1. Price curve
|
| 102 |
const priceChart = makeLineChart('chart-price',
|
|
|
|
| 258 |
{ yAxis: { title: { display: true, text: 'gCO₂/kWh' } } }
|
| 259 |
);
|
| 260 |
|
| 261 |
+
// 8. Reward timeline curve
|
| 262 |
+
const rewardChart = makeLineChart('chart-reward',
|
| 263 |
[],
|
| 264 |
[
|
| 265 |
+
{ label: 'Step Reward', data: [], borderColor: COLORS.green, backgroundColor: rgba(COLORS.green, 0.1), borderWidth: 2, fill: true, tension: 0.4, pointRadius: 0 },
|
| 266 |
+
],
|
| 267 |
+
{ yAxis: { title: { display: true, text: 'Reward' } } }
|
|
|
|
|
|
|
|
|
|
| 268 |
);
|
| 269 |
|
| 270 |
// ── Stress meter bars ────────────────────────────────────────────────────────
|
|
|
|
| 306 |
wrap.innerHTML = '<div style="color:var(--text-dim);font-size:0.8rem">No batch jobs in this episode.</div>';
|
| 307 |
return;
|
| 308 |
}
|
| 309 |
+
const totalSlots = 288;
|
| 310 |
wrap.innerHTML = '';
|
| 311 |
jobs.forEach(job => {
|
| 312 |
const row = document.createElement('div');
|
|
|
|
| 416 |
const hourOfDay = b.hour_of_day || 0;
|
| 417 |
|
| 418 |
// ── Header ──
|
| 419 |
+
document.getElementById('ep-step').textContent = `ep:${state.episode} step:${step}/287`;
|
| 420 |
document.getElementById('task-badge').textContent = TASK_NAMES[state.task_id] || 'Task 1';
|
| 421 |
|
| 422 |
// ── KPIs ──
|
|
|
|
| 444 |
document.getElementById('kpi-storage').textContent = `${(b.thermal_storage_level * 100).toFixed(1)}`;
|
| 445 |
|
| 446 |
// ── Price curve chart ──
|
| 447 |
+
if (state.price_curve_episode && state.price_curve_episode.length === 72) {
|
| 448 |
+
const labels = Array.from({ length: 72 }, (_, i) => `${i}:00`);
|
| 449 |
priceChart.data.labels = labels;
|
| 450 |
+
priceChart.data.datasets[0].data = state.price_curve_episode;
|
| 451 |
// Current position marker
|
| 452 |
+
const marker = Array(72).fill(null);
|
| 453 |
+
marker[Math.floor(step / 4)] = state.price_curve_episode[Math.floor(step / 4)];
|
| 454 |
priceChart.data.datasets[1].data = marker;
|
| 455 |
priceChart.update('none');
|
| 456 |
}
|
| 457 |
|
| 458 |
// ── Carbon curve ──
|
| 459 |
+
if (state.carbon_curve_episode && state.carbon_curve_episode.length === 72) {
|
| 460 |
+
carbonChart.data.labels = Array.from({ length: 72 }, (_, i) => `${i}:00`);
|
| 461 |
+
carbonChart.data.datasets[0].data = state.carbon_curve_episode;
|
| 462 |
carbonChart.update('none');
|
| 463 |
}
|
| 464 |
|
|
|
|
| 527 |
stressChart.data.datasets[0].data = b.reward_history.map(r => Math.max(0, r.grid_response || 0));
|
| 528 |
stressChart.update('none');
|
| 529 |
|
| 530 |
+
// Total reward timeline chart (full episode)
|
| 531 |
+
rewardChart.data.labels = Array.from({ length: n }, (_, i) => i);
|
| 532 |
+
rewardChart.data.datasets[0].data = b.reward_history.map(r => r.total || 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
rewardChart.update('none');
|
| 534 |
|
| 535 |
// Reward rows (last step)
|
|
|
|
| 570 |
document.getElementById('grade-result').textContent = '';
|
| 571 |
}
|
| 572 |
|
| 573 |
+
let liveSimTimer = null;
|
| 574 |
+
let isLiveSimulating = false;
|
| 575 |
+
|
| 576 |
+
function toggleLiveSim() {
|
| 577 |
+
const btn = document.getElementById('btn-live');
|
| 578 |
+
if (isLiveSimulating) {
|
| 579 |
+
// Stop live sim
|
| 580 |
+
clearInterval(liveSimTimer);
|
| 581 |
+
isLiveSimulating = false;
|
| 582 |
+
btn.textContent = '▶ Start Live Simulation';
|
| 583 |
+
btn.style.background = 'var(--accent-green)';
|
| 584 |
+
} else {
|
| 585 |
+
// Start live sim
|
| 586 |
+
isLiveSimulating = true;
|
| 587 |
+
btn.textContent = '⏸ Pause Live Simulation';
|
| 588 |
+
btn.style.background = 'var(--accent-amber)';
|
| 589 |
+
|
| 590 |
+
liveSimTimer = setInterval(async () => {
|
| 591 |
+
// Step the environment automatically with a simple heuristic policy
|
| 592 |
+
const taskId = parseInt(document.getElementById('task-select').value, 10);
|
| 593 |
+
try {
|
| 594 |
+
await fetch(`${API_BASE}/step`, {
|
| 595 |
+
method: 'POST',
|
| 596 |
+
headers: { 'Content-Type': 'application/json' },
|
| 597 |
+
body: JSON.stringify({
|
| 598 |
+
hvac_power_level: 0.5,
|
| 599 |
+
thermal_charge_rate: 0.0,
|
| 600 |
+
batch_job_slot: 0,
|
| 601 |
+
load_shed_fraction: 0.0,
|
| 602 |
+
building_id: currentBuilding
|
| 603 |
+
}),
|
| 604 |
+
});
|
| 605 |
+
// fetchAndUpdate() will catch the change via polling
|
| 606 |
+
} catch (e) {
|
| 607 |
+
console.error(e);
|
| 608 |
+
}
|
| 609 |
+
}, 400); // 400ms per step
|
| 610 |
+
}
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
async function doGrade() {
|
| 614 |
try {
|
| 615 |
const res = await fetch(`${API_BASE}/grade`);
|
dashboard/static/index.html
CHANGED
|
@@ -514,7 +514,7 @@
|
|
| 514 |
|
| 515 |
<!-- Row 1: Price curve + Temperature + Controls -->
|
| 516 |
<div class="card col-8">
|
| 517 |
-
<div class="card-title"><span class="icon">💰</span>
|
| 518 |
<div class="chart-wrap">
|
| 519 |
<canvas id="chart-price"></canvas>
|
| 520 |
</div>
|
|
@@ -587,7 +587,7 @@
|
|
| 587 |
</div>
|
| 588 |
|
| 589 |
<div class="card col-6">
|
| 590 |
-
<div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (
|
| 591 |
<div class="chart-wrap">
|
| 592 |
<canvas id="chart-carbon"></canvas>
|
| 593 |
</div>
|
|
@@ -608,6 +608,7 @@
|
|
| 608 |
<option value="2">Building 3</option>
|
| 609 |
</select>
|
| 610 |
<button id="btn-reset" class="btn primary" onclick="doReset()">↺ New Episode</button>
|
|
|
|
| 611 |
<button class="btn" onclick="doGrade()">📋 Grade Episode</button>
|
| 612 |
<button class="btn" onclick="window.open('/api/replay')">📥 Export Replay</button>
|
| 613 |
<span id="grade-result" style="font-family:var(--font-mono);font-size:0.9rem;color:var(--accent-green)"></span>
|
|
|
|
| 514 |
|
| 515 |
<!-- Row 1: Price curve + Temperature + Controls -->
|
| 516 |
<div class="card col-8">
|
| 517 |
+
<div class="card-title"><span class="icon">💰</span> Electricity Price Curve (72h)</div>
|
| 518 |
<div class="chart-wrap">
|
| 519 |
<canvas id="chart-price"></canvas>
|
| 520 |
</div>
|
|
|
|
| 587 |
</div>
|
| 588 |
|
| 589 |
<div class="card col-6">
|
| 590 |
+
<div class="card-title"><span class="icon">🌍</span> Carbon Intensity Curve (72h)</div>
|
| 591 |
<div class="chart-wrap">
|
| 592 |
<canvas id="chart-carbon"></canvas>
|
| 593 |
</div>
|
|
|
|
| 608 |
<option value="2">Building 3</option>
|
| 609 |
</select>
|
| 610 |
<button id="btn-reset" class="btn primary" onclick="doReset()">↺ New Episode</button>
|
| 611 |
+
<button id="btn-live" class="btn" style="background:var(--accent-green);color:#fff;border:none;" onclick="toggleLiveSim()">▶ Start Live Simulation</button>
|
| 612 |
<button class="btn" onclick="doGrade()">📋 Grade Episode</button>
|
| 613 |
<button class="btn" onclick="window.open('/api/replay')">📥 Export Replay</button>
|
| 614 |
<span id="grade-result" style="font-family:var(--font-mono);font-size:0.9rem;color:var(--accent-green)"></span>
|
env/environment.go
CHANGED
|
@@ -11,7 +11,7 @@ import (
|
|
| 11 |
)
|
| 12 |
|
| 13 |
const (
|
| 14 |
-
EpisodeSteps =
|
| 15 |
StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
|
| 16 |
MaxBuildings = 3
|
| 17 |
DefaultSetpoint = 21.0 // °C comfortable indoor temp
|
|
@@ -219,9 +219,9 @@ func (e *Environment) GetState() StateResponse {
|
|
| 219 |
buildings[i] = pub
|
| 220 |
}
|
| 221 |
|
| 222 |
-
priceCurve := make([]float64,
|
| 223 |
-
carbonCurve := make([]float64,
|
| 224 |
-
for h := 0; h <
|
| 225 |
stepIdx := h * 4
|
| 226 |
if stepIdx < EpisodeSteps {
|
| 227 |
priceCurve[h] = e.PriceCurve[stepIdx]
|
|
|
|
| 11 |
)
|
| 12 |
|
| 13 |
const (
|
| 14 |
+
EpisodeSteps = 288 // 72 hours × 15-min intervals
|
| 15 |
StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
|
| 16 |
MaxBuildings = 3
|
| 17 |
DefaultSetpoint = 21.0 // °C comfortable indoor temp
|
|
|
|
| 219 |
buildings[i] = pub
|
| 220 |
}
|
| 221 |
|
| 222 |
+
priceCurve := make([]float64, EpisodeSteps/4)
|
| 223 |
+
carbonCurve := make([]float64, EpisodeSteps/4)
|
| 224 |
+
for h := 0; h < EpisodeSteps/4; h++ {
|
| 225 |
stepIdx := h * 4
|
| 226 |
if stepIdx < EpisodeSteps {
|
| 227 |
priceCurve[h] = e.PriceCurve[stepIdx]
|
env/models.go
CHANGED
|
@@ -124,8 +124,8 @@ type ResetResponse struct {
|
|
| 124 |
// StateResponse is returned from GET /state.
|
| 125 |
type StateResponse struct {
|
| 126 |
Buildings []BuildingStatePublic `json:"buildings"`
|
| 127 |
-
PriceCurve []float64 `json:"
|
| 128 |
-
CarbonCurve []float64 `json:"
|
| 129 |
Episode int `json:"episode"`
|
| 130 |
Step int `json:"step"`
|
| 131 |
TaskID int `json:"task_id"`
|
|
|
|
| 124 |
// StateResponse is returned from GET /state.
|
| 125 |
type StateResponse struct {
|
| 126 |
Buildings []BuildingStatePublic `json:"buildings"`
|
| 127 |
+
PriceCurve []float64 `json:"price_curve_episode"` // full episode ToU prices
|
| 128 |
+
CarbonCurve []float64 `json:"carbon_curve_episode"` // full episode carbon intensities
|
| 129 |
Episode int `json:"episode"`
|
| 130 |
Step int `json:"step"`
|
| 131 |
TaskID int `json:"task_id"`
|
python/inference.py
CHANGED
|
@@ -239,7 +239,9 @@ def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
|
|
| 239 |
total_steps = 0
|
| 240 |
start_time = time.time()
|
| 241 |
|
| 242 |
-
|
|
|
|
|
|
|
| 243 |
action = agent.choose_action(obs, task_id)
|
| 244 |
step_resp = env_client.step(action)
|
| 245 |
|
|
@@ -253,9 +255,7 @@ def run_episode(env_client: GridMindEnvClient, agent: LLMAgent,
|
|
| 253 |
f"stress={obs['grid_stress_signal']:.2f} "
|
| 254 |
f"cost=${obs['cumulative_cost']:.2f} "
|
| 255 |
f"reward={step_resp['reward']:.3f}")
|
| 256 |
-
|
| 257 |
-
if step_resp.get("done", False):
|
| 258 |
-
break
|
| 259 |
|
| 260 |
elapsed = time.time() - start_time
|
| 261 |
grade = env_client.grade()
|
|
|
|
| 239 |
total_steps = 0
|
| 240 |
start_time = time.time()
|
| 241 |
|
| 242 |
+
step_resp = {}
|
| 243 |
+
_step = 0
|
| 244 |
+
while not step_resp.get("done", False):
|
| 245 |
action = agent.choose_action(obs, task_id)
|
| 246 |
step_resp = env_client.step(action)
|
| 247 |
|
|
|
|
| 255 |
f"stress={obs['grid_stress_signal']:.2f} "
|
| 256 |
f"cost=${obs['cumulative_cost']:.2f} "
|
| 257 |
f"reward={step_resp['reward']:.3f}")
|
| 258 |
+
_step += 1
|
|
|
|
|
|
|
| 259 |
|
| 260 |
elapsed = time.time() - start_time
|
| 261 |
grade = env_client.grade()
|
python/models.py
CHANGED
|
@@ -29,7 +29,7 @@ class ObservationModel(BaseModel):
|
|
| 29 |
hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
|
| 30 |
batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
|
| 31 |
cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
|
| 32 |
-
step: int = Field(..., ge=0, description="Current timestep (0–
|
| 33 |
building_id: int = Field(default=0, description="Building index in federation")
|
| 34 |
|
| 35 |
|
|
@@ -137,8 +137,8 @@ class BuildingStatePublic(BaseModel):
|
|
| 137 |
class StateResponse(BaseModel):
|
| 138 |
"""Full environment state from GET /state."""
|
| 139 |
buildings: List[BuildingStatePublic]
|
| 140 |
-
|
| 141 |
-
|
| 142 |
episode: int
|
| 143 |
step: int
|
| 144 |
task_id: int
|
|
|
|
| 29 |
hour_of_day: int = Field(..., ge=0, le=23, description="Current hour of day (0–23)")
|
| 30 |
batch_queue: List[int] = Field(default_factory=list, description="Deadline slots of pending batch jobs")
|
| 31 |
cumulative_cost: float = Field(..., ge=0.0, description="Running energy cost this episode ($)")
|
| 32 |
+
step: int = Field(..., ge=0, description="Current timestep (0–287)")
|
| 33 |
building_id: int = Field(default=0, description="Building index in federation")
|
| 34 |
|
| 35 |
|
|
|
|
| 137 |
class StateResponse(BaseModel):
|
| 138 |
"""Full environment state from GET /state."""
|
| 139 |
buildings: List[BuildingStatePublic]
|
| 140 |
+
price_curve_episode: List[float]
|
| 141 |
+
carbon_curve_episode: List[float]
|
| 142 |
episode: int
|
| 143 |
step: int
|
| 144 |
task_id: int
|
python/validate.py
CHANGED
|
@@ -168,8 +168,11 @@ def validate(env_url: str) -> bool:
|
|
| 168 |
post(f"{base}/reset", {"task_id": 1, "seed": 777})
|
| 169 |
action = {"hvac_power_level": 0.3, "thermal_charge_rate": 0.0,
|
| 170 |
"batch_job_slot": 0, "load_shed_fraction": 0.0}
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
| 173 |
r = get(f"{base}/grade")
|
| 174 |
results.append(check("GET /grade returns 200", r.status_code == 200))
|
| 175 |
grade = r.json()
|
|
@@ -217,9 +220,12 @@ def validate(env_url: str) -> bool:
|
|
| 217 |
# Two different policies
|
| 218 |
for a in [0.1, 0.9]:
|
| 219 |
post(f"{base}/reset", {"task_id": 1, "seed": seed})
|
| 220 |
-
|
| 221 |
-
|
|
|
|
| 222 |
"batch_job_slot": 0, "load_shed_fraction": 0})
|
|
|
|
|
|
|
| 223 |
g = requests.get(f"{base}/grade", timeout=10).json()
|
| 224 |
sc = g.get("score", 0)
|
| 225 |
scores_nonzero.append(sc > 0.01)
|
|
|
|
| 168 |
post(f"{base}/reset", {"task_id": 1, "seed": 777})
|
| 169 |
action = {"hvac_power_level": 0.3, "thermal_charge_rate": 0.0,
|
| 170 |
"batch_job_slot": 0, "load_shed_fraction": 0.0}
|
| 171 |
+
done = False
|
| 172 |
+
while not done:
|
| 173 |
+
r2 = post(f"{base}/step", action)
|
| 174 |
+
if r2.json().get("done"):
|
| 175 |
+
done = True
|
| 176 |
r = get(f"{base}/grade")
|
| 177 |
results.append(check("GET /grade returns 200", r.status_code == 200))
|
| 178 |
grade = r.json()
|
|
|
|
| 220 |
# Two different policies
|
| 221 |
for a in [0.1, 0.9]:
|
| 222 |
post(f"{base}/reset", {"task_id": 1, "seed": seed})
|
| 223 |
+
done = False
|
| 224 |
+
while not done:
|
| 225 |
+
r2 = post(f"{base}/step", {"hvac_power_level": a, "thermal_charge_rate": 0,
|
| 226 |
"batch_job_slot": 0, "load_shed_fraction": 0})
|
| 227 |
+
if r2.json().get("done"):
|
| 228 |
+
done = True
|
| 229 |
g = requests.get(f"{base}/grade", timeout=10).json()
|
| 230 |
sc = g.get("score", 0)
|
| 231 |
scores_nonzero.append(sc > 0.01)
|
tests/environment_test.go
CHANGED
|
@@ -57,19 +57,19 @@ func TestStepAdvancesState(t *testing.T) {
|
|
| 57 |
}
|
| 58 |
}
|
| 59 |
|
| 60 |
-
//
|
| 61 |
-
func
|
| 62 |
e := env.NewEnvironment()
|
| 63 |
var seed int64 = 99
|
| 64 |
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
|
| 65 |
|
| 66 |
action := []env.ActionModel{{HVACPowerLevel: 0.5}}
|
| 67 |
var lastDone bool
|
| 68 |
-
for i := 0; i <
|
| 69 |
_, lastDone = e.Step(action)
|
| 70 |
}
|
| 71 |
if !lastDone {
|
| 72 |
-
t.Errorf("episode should be done after
|
| 73 |
}
|
| 74 |
}
|
| 75 |
|
|
@@ -162,7 +162,7 @@ func TestGraderTask1ScoreRange(t *testing.T) {
|
|
| 162 |
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
|
| 163 |
|
| 164 |
action := []env.ActionModel{{HVACPowerLevel: 0.3}}
|
| 165 |
-
for i := 0; i <
|
| 166 |
e.Step(action)
|
| 167 |
}
|
| 168 |
|
|
|
|
| 57 |
}
|
| 58 |
}
|
| 59 |
|
| 60 |
+
// TestEpisodeLengthIs288 verifies the episode terminates at step 288.
|
| 61 |
+
func TestEpisodeLengthIs288(t *testing.T) {
|
| 62 |
e := env.NewEnvironment()
|
| 63 |
var seed int64 = 99
|
| 64 |
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1, NumBuildings: 1})
|
| 65 |
|
| 66 |
action := []env.ActionModel{{HVACPowerLevel: 0.5}}
|
| 67 |
var lastDone bool
|
| 68 |
+
for i := 0; i < 288; i++ {
|
| 69 |
_, lastDone = e.Step(action)
|
| 70 |
}
|
| 71 |
if !lastDone {
|
| 72 |
+
t.Errorf("episode should be done after 288 steps")
|
| 73 |
}
|
| 74 |
}
|
| 75 |
|
|
|
|
| 162 |
e.Reset(env.ResetRequest{Seed: &seed, TaskID: 1})
|
| 163 |
|
| 164 |
action := []env.ActionModel{{HVACPowerLevel: 0.3}}
|
| 165 |
+
for i := 0; i < 288; i++ {
|
| 166 |
e.Step(action)
|
| 167 |
}
|
| 168 |
|
tests/test_graders.py
CHANGED
|
@@ -51,10 +51,11 @@ def grade() -> dict:
|
|
| 51 |
def run_full_episode(task_id: int, seed: int, hvac: float = 0.5) -> dict:
|
| 52 |
reset(task_id=task_id, seed=seed)
|
| 53 |
action = {"hvac_power_level": hvac, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0}
|
| 54 |
-
|
|
|
|
| 55 |
resp = step(action)
|
| 56 |
if resp.get("done"):
|
| 57 |
-
|
| 58 |
return grade()
|
| 59 |
|
| 60 |
|
|
@@ -86,8 +87,11 @@ class TestTask1:
|
|
| 86 |
"""Always shedding 50% should be detected and penalized."""
|
| 87 |
reset(task_id=1, seed=10)
|
| 88 |
action = {"hvac_power_level": 0.5, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0.5}
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
| 91 |
g = grade()
|
| 92 |
# Score should be reduced OR exploit flagged
|
| 93 |
assert g["exploit_detected"] or g["score"] < 0.9
|
|
@@ -165,9 +169,10 @@ class TestMultiBuilding:
|
|
| 165 |
{"hvac_power_level": 0.4, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0, "building_id": 0},
|
| 166 |
{"hvac_power_level": 0.6, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0, "building_id": 1},
|
| 167 |
]
|
| 168 |
-
|
|
|
|
| 169 |
r = requests.post(f"{BASE}/step", json=action)
|
| 170 |
if r.json()[0].get("done"):
|
| 171 |
-
|
| 172 |
g = grade()
|
| 173 |
assert 0.0 <= g["score"] <= 1.0
|
|
|
|
| 51 |
def run_full_episode(task_id: int, seed: int, hvac: float = 0.5) -> dict:
|
| 52 |
reset(task_id=task_id, seed=seed)
|
| 53 |
action = {"hvac_power_level": hvac, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0}
|
| 54 |
+
done = False
|
| 55 |
+
while not done:
|
| 56 |
resp = step(action)
|
| 57 |
if resp.get("done"):
|
| 58 |
+
done = True
|
| 59 |
return grade()
|
| 60 |
|
| 61 |
|
|
|
|
| 87 |
"""Always shedding 50% should be detected and penalized."""
|
| 88 |
reset(task_id=1, seed=10)
|
| 89 |
action = {"hvac_power_level": 0.5, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0.5}
|
| 90 |
+
done = False
|
| 91 |
+
while not done:
|
| 92 |
+
resp = step(action)
|
| 93 |
+
if resp.get("done"):
|
| 94 |
+
done = True
|
| 95 |
g = grade()
|
| 96 |
# Score should be reduced OR exploit flagged
|
| 97 |
assert g["exploit_detected"] or g["score"] < 0.9
|
|
|
|
| 169 |
{"hvac_power_level": 0.4, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0, "building_id": 0},
|
| 170 |
{"hvac_power_level": 0.6, "thermal_charge_rate": 0, "batch_job_slot": 0, "load_shed_fraction": 0, "building_id": 1},
|
| 171 |
]
|
| 172 |
+
done = False
|
| 173 |
+
while not done:
|
| 174 |
r = requests.post(f"{BASE}/step", json=action)
|
| 175 |
if r.json()[0].get("done"):
|
| 176 |
+
done = True
|
| 177 |
g = grade()
|
| 178 |
assert 0.0 <= g["score"] <= 1.0
|