Spaces:
Sleeping
Sleeping
refactor: replace heuristic log generation with Go-based environment simulation and update API schema
Browse files- .gitignore +2 -1
- baseline_scores.json +38 -5
- env/environment.go +31 -0
- env/models.go +20 -11
- generate_realistic_training_log.py +0 -33
- generate_training_log.py +0 -52
- inference.py +1 -1
- main.go +30 -6
- openenv.yaml +23 -3
.gitignore
CHANGED
|
@@ -77,4 +77,5 @@ wandb/
|
|
| 77 |
IMPLEMENTATION_PLAN.md
|
| 78 |
TODO_SPRINT.md
|
| 79 |
context.md
|
| 80 |
-
REPORT.
|
|
|
|
|
|
| 77 |
IMPLEMENTATION_PLAN.md
|
| 78 |
TODO_SPRINT.md
|
| 79 |
context.md
|
| 80 |
+
REPORT.mdgridmind.exe
|
| 81 |
+
gridmind-server.exe
|
baseline_scores.json
CHANGED
|
@@ -7,17 +7,50 @@
|
|
| 7 |
"llm_every": 8,
|
| 8 |
"max_steps": null,
|
| 9 |
"task_averages": {
|
| 10 |
-
"1": 0.
|
|
|
|
|
|
|
|
|
|
| 11 |
},
|
| 12 |
-
"overall_average": 0.
|
| 13 |
"all_results": [
|
| 14 |
{
|
| 15 |
"task_id": 1,
|
| 16 |
"seed": 1100,
|
| 17 |
-
"total_reward":
|
| 18 |
"total_steps": 96,
|
| 19 |
-
"elapsed_sec": 1.
|
| 20 |
-
"score": 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"sub_scores": {},
|
| 22 |
"exploit_detected": false
|
| 23 |
}
|
|
|
|
| 7 |
"llm_every": 8,
|
| 8 |
"max_steps": null,
|
| 9 |
"task_averages": {
|
| 10 |
+
"1": 0.4942,
|
| 11 |
+
"2": 0.4707,
|
| 12 |
+
"3": 0.7478,
|
| 13 |
+
"4": 0.4779
|
| 14 |
},
|
| 15 |
+
"overall_average": 0.54765,
|
| 16 |
"all_results": [
|
| 17 |
{
|
| 18 |
"task_id": 1,
|
| 19 |
"seed": 1100,
|
| 20 |
+
"total_reward": 251.84571448658104,
|
| 21 |
"total_steps": 96,
|
| 22 |
+
"elapsed_sec": 1.227782964706421,
|
| 23 |
+
"score": 0.4942,
|
| 24 |
+
"sub_scores": {},
|
| 25 |
+
"exploit_detected": false
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"task_id": 2,
|
| 29 |
+
"seed": 1200,
|
| 30 |
+
"total_reward": 245.38403598363988,
|
| 31 |
+
"total_steps": 96,
|
| 32 |
+
"elapsed_sec": 0.8327796459197998,
|
| 33 |
+
"score": 0.4707,
|
| 34 |
+
"sub_scores": {},
|
| 35 |
+
"exploit_detected": false
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"task_id": 3,
|
| 39 |
+
"seed": 1300,
|
| 40 |
+
"total_reward": 242.06080137356216,
|
| 41 |
+
"total_steps": 96,
|
| 42 |
+
"elapsed_sec": 0.6833479404449463,
|
| 43 |
+
"score": 0.7478,
|
| 44 |
+
"sub_scores": {},
|
| 45 |
+
"exploit_detected": false
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"task_id": 4,
|
| 49 |
+
"seed": 1400,
|
| 50 |
+
"total_reward": 206.4647897455665,
|
| 51 |
+
"total_steps": 96,
|
| 52 |
+
"elapsed_sec": 1.0237984657287598,
|
| 53 |
+
"score": 0.4779,
|
| 54 |
"sub_scores": {},
|
| 55 |
"exploit_detected": false
|
| 56 |
}
|
env/environment.go
CHANGED
|
@@ -587,6 +587,7 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
|
|
| 587 |
Episode: e.episode,
|
| 588 |
Step: s,
|
| 589 |
},
|
|
|
|
| 590 |
}
|
| 591 |
}
|
| 592 |
|
|
@@ -692,6 +693,29 @@ func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
|
|
| 692 |
// Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp
|
| 693 |
reportedTemp := b.IndoorTemperature + b.TempObservationNoise
|
| 694 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
return ObservationModel{
|
| 696 |
IndoorTemperature: math.Round(reportedTemp*100) / 100,
|
| 697 |
ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
|
|
@@ -707,6 +731,13 @@ func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
|
|
| 707 |
HVACEfficiency: math.Round(b.HVACEfficiency*1000) / 1000,
|
| 708 |
InstructionCard: e.InstructionCard,
|
| 709 |
ActiveFaults: activeFaults,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
}
|
| 711 |
}
|
| 712 |
|
|
|
|
| 587 |
Episode: e.episode,
|
| 588 |
Step: s,
|
| 589 |
},
|
| 590 |
+
Rewards: rc,
|
| 591 |
}
|
| 592 |
}
|
| 593 |
|
|
|
|
| 693 |
// Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp
|
| 694 |
reportedTemp := b.IndoorTemperature + b.TempObservationNoise
|
| 695 |
|
| 696 |
+
taskCardStr := ""
|
| 697 |
+
if e.taskID == 4 && e.InstructionCard != nil {
|
| 698 |
+
taskCardStr = e.InstructionCard.Text
|
| 699 |
+
} else if e.taskID == 1 {
|
| 700 |
+
taskCardStr = "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage."
|
| 701 |
+
} else if e.taskID == 2 {
|
| 702 |
+
taskCardStr = "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost."
|
| 703 |
+
} else if e.taskID == 3 {
|
| 704 |
+
taskCardStr = "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon."
|
| 705 |
+
} else {
|
| 706 |
+
taskCardStr = "Maintain operations and minimize cost."
|
| 707 |
+
}
|
| 708 |
+
|
| 709 |
+
priceForecast := make([]float64, 4)
|
| 710 |
+
for i := 0; i < 4; i++ {
|
| 711 |
+
idx := b.Step + i
|
| 712 |
+
if idx < EpisodeSteps {
|
| 713 |
+
priceForecast[i] = math.Round(e.PriceCurve[idx]*10000) / 10000
|
| 714 |
+
} else {
|
| 715 |
+
priceForecast[i] = math.Round(e.PriceCurve[EpisodeSteps-1]*10000) / 10000
|
| 716 |
+
}
|
| 717 |
+
}
|
| 718 |
+
|
| 719 |
return ObservationModel{
|
| 720 |
IndoorTemperature: math.Round(reportedTemp*100) / 100,
|
| 721 |
ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
|
|
|
|
| 731 |
HVACEfficiency: math.Round(b.HVACEfficiency*1000) / 1000,
|
| 732 |
InstructionCard: e.InstructionCard,
|
| 733 |
ActiveFaults: activeFaults,
|
| 734 |
+
TaskCard: taskCardStr,
|
| 735 |
+
NLSummary: "GridMind simulation state.",
|
| 736 |
+
MarketType: "tou",
|
| 737 |
+
Season: "summer",
|
| 738 |
+
PriceVolatility: 0.2,
|
| 739 |
+
PriceForecast: priceForecast,
|
| 740 |
+
DemandChargeActive: false,
|
| 741 |
}
|
| 742 |
}
|
| 743 |
|
env/models.go
CHANGED
|
@@ -76,6 +76,13 @@ type ObservationModel struct {
|
|
| 76 |
HVACEfficiency float64 `json:"hvac_efficiency"`
|
| 77 |
InstructionCard *InstructionCard `json:"instruction_card,omitempty"` // populated for Task 4 only
|
| 78 |
ActiveFaults []string `json:"active_faults,omitempty"` // human-readable alarm strings for active faults
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
}
|
| 80 |
|
| 81 |
// ActionModel is the parsed agent action for a single step.
|
|
@@ -87,18 +94,19 @@ type ActionModel struct {
|
|
| 87 |
BuildingID int `json:"building_id"` // which building to act on
|
| 88 |
}
|
| 89 |
|
| 90 |
-
// RewardComponents holds the individual components of the dense reward signal.
|
| 91 |
type RewardComponents struct {
|
| 92 |
-
CostSavings
|
| 93 |
-
TempConstraint
|
| 94 |
-
GridResponse
|
| 95 |
-
DeadlinePenalty
|
| 96 |
-
EfficiencyBonus
|
| 97 |
-
StabilityPenalty
|
| 98 |
-
CarbonReward
|
| 99 |
-
InstructionReward
|
| 100 |
-
FaultMitigation
|
| 101 |
-
|
|
|
|
|
|
|
| 102 |
}
|
| 103 |
|
| 104 |
// StepResponse is the full HTTP body returned from POST /step.
|
|
@@ -107,6 +115,7 @@ type StepResponse struct {
|
|
| 107 |
Reward float64 `json:"reward"`
|
| 108 |
Done bool `json:"done"`
|
| 109 |
Info StepInfo `json:"info"`
|
|
|
|
| 110 |
}
|
| 111 |
|
| 112 |
// StepInfo carries auxiliary information per step.
|
|
|
|
| 76 |
HVACEfficiency float64 `json:"hvac_efficiency"`
|
| 77 |
InstructionCard *InstructionCard `json:"instruction_card,omitempty"` // populated for Task 4 only
|
| 78 |
ActiveFaults []string `json:"active_faults,omitempty"` // human-readable alarm strings for active faults
|
| 79 |
+
TaskCard string `json:"task_card"`
|
| 80 |
+
NLSummary string `json:"nl_summary"`
|
| 81 |
+
MarketType string `json:"market_type"`
|
| 82 |
+
Season string `json:"season"`
|
| 83 |
+
PriceVolatility float64 `json:"price_volatility"`
|
| 84 |
+
PriceForecast []float64 `json:"price_forecast"`
|
| 85 |
+
DemandChargeActive bool `json:"demand_charge_active"`
|
| 86 |
}
|
| 87 |
|
| 88 |
// ActionModel is the parsed agent action for a single step.
|
|
|
|
| 94 |
BuildingID int `json:"building_id"` // which building to act on
|
| 95 |
}
|
| 96 |
|
|
|
|
| 97 |
type RewardComponents struct {
|
| 98 |
+
CostSavings float64 `json:"cost_savings"`
|
| 99 |
+
TempConstraint float64 `json:"temperature_constraint"`
|
| 100 |
+
GridResponse float64 `json:"grid_response"`
|
| 101 |
+
DeadlinePenalty float64 `json:"deadline_penalty"`
|
| 102 |
+
EfficiencyBonus float64 `json:"efficiency_bonus"`
|
| 103 |
+
StabilityPenalty float64 `json:"stability_penalty"`
|
| 104 |
+
CarbonReward float64 `json:"carbon_reward"`
|
| 105 |
+
InstructionReward float64 `json:"task_satisfaction"`
|
| 106 |
+
FaultMitigation float64 `json:"fault_mitigation"`
|
| 107 |
+
PriceAnticipation float64 `json:"price_anticipation"`
|
| 108 |
+
DemandChargePenalty float64 `json:"demand_charge_penalty"`
|
| 109 |
+
Total float64 `json:"total"`
|
| 110 |
}
|
| 111 |
|
| 112 |
// StepResponse is the full HTTP body returned from POST /step.
|
|
|
|
| 115 |
Reward float64 `json:"reward"`
|
| 116 |
Done bool `json:"done"`
|
| 117 |
Info StepInfo `json:"info"`
|
| 118 |
+
Rewards RewardComponents `json:"rewards"`
|
| 119 |
}
|
| 120 |
|
| 121 |
// StepInfo carries auxiliary information per step.
|
generate_realistic_training_log.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
import csv, random, math, os
|
| 3 |
-
|
| 4 |
-
random.seed(42)
|
| 5 |
-
os.makedirs("results", exist_ok=True)
|
| 6 |
-
|
| 7 |
-
rows = []
|
| 8 |
-
for step in range(0, 301, 5):
|
| 9 |
-
progress = step / 300
|
| 10 |
-
base = 0.52 + (0.68 - 0.52) * (1 - math.exp(-3 * progress)) + random.gauss(0, 0.015)
|
| 11 |
-
json_valid = min(0.2, 0.15 + random.gauss(0, 0.03))
|
| 12 |
-
rows.append({
|
| 13 |
-
"step": step,
|
| 14 |
-
"loss": max(0.000001, 0.00002 - progress * 0.00001 + random.gauss(0, 0.000005)),
|
| 15 |
-
"rewards/reward_json_valid/mean": max(0, min(0.2, json_valid)),
|
| 16 |
-
"rewards/reward_json_valid/std": 0.02,
|
| 17 |
-
"rewards/reward_env_interaction/mean": max(0.4, min(0.75, base)),
|
| 18 |
-
"rewards/reward_env_interaction/std": 0.02,
|
| 19 |
-
"rewards/reward/mean": 0.20 + json_valid + max(0.4, min(0.75, base)) * 0.4,
|
| 20 |
-
})
|
| 21 |
-
|
| 22 |
-
columns = ["step", "loss", "rewards/reward_json_valid/mean", "rewards/reward_json_valid/std",
|
| 23 |
-
"rewards/reward_env_interaction/mean", "rewards/reward_env_interaction/std", "rewards/reward/mean"]
|
| 24 |
-
|
| 25 |
-
with open("results/training_log.csv", "w", newline="") as f:
|
| 26 |
-
writer = csv.DictWriter(f, fieldnames=columns)
|
| 27 |
-
writer.writeheader()
|
| 28 |
-
writer.writerows(rows)
|
| 29 |
-
|
| 30 |
-
print(f"Generated {len(rows)} training steps with realistic learning curve")
|
| 31 |
-
print(f"Initial episode score: {rows[0]['rewards/reward_env_interaction/mean']:.3f}")
|
| 32 |
-
print(f"Final episode score: {rows[-1]['rewards/reward_env_interaction/mean']:.3f}")
|
| 33 |
-
print(f"Improvement: {(rows[-1]['rewards/reward_env_interaction/mean'] - rows[0]['rewards/reward_env_interaction/mean']):.3f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_training_log.py
DELETED
|
@@ -1,52 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
GridMind-RL Training Log Generator
|
| 4 |
-
Generates a realistic training log CSV from heuristic baseline runs.
|
| 5 |
-
"""
|
| 6 |
-
import csv
|
| 7 |
-
import os
|
| 8 |
-
import json
|
| 9 |
-
import random
|
| 10 |
-
import math
|
| 11 |
-
|
| 12 |
-
random.seed(42)
|
| 13 |
-
|
| 14 |
-
os.makedirs("results", exist_ok=True)
|
| 15 |
-
|
| 16 |
-
with open("results/baseline_scores_heuristic.json") as f:
|
| 17 |
-
heuristic_data = json.load(f)
|
| 18 |
-
|
| 19 |
-
heuristic_by_task = {int(k): v for k, v in heuristic_data["task_averages"].items()}
|
| 20 |
-
overall_heuristic = heuristic_data["overall_average"]
|
| 21 |
-
llm_baseline = 0.65
|
| 22 |
-
target_performance = 0.72
|
| 23 |
-
|
| 24 |
-
N_STEPS = 200
|
| 25 |
-
NOISE_SCALE = 0.02
|
| 26 |
-
IMPROVEMENT_RATE = 0.003
|
| 27 |
-
|
| 28 |
-
rows = []
|
| 29 |
-
for step in range(0, N_STEPS + 1, 5):
|
| 30 |
-
progress = step / N_STEPS
|
| 31 |
-
base = overall_heuristic + (target_performance - overall_heuristic) * math.sin(progress * math.pi / 2)
|
| 32 |
-
loss = 2.0 - progress * 1.5 + random.gauss(0, 0.1)
|
| 33 |
-
reward_valid = 0.3 + random.gauss(0, 0.02)
|
| 34 |
-
reward_keys = 0.3 + random.gauss(0, 0.02)
|
| 35 |
-
reward_env = base * 0.4 + random.gauss(0, NOISE_SCALE)
|
| 36 |
-
rows.append({
|
| 37 |
-
"step": step,
|
| 38 |
-
"loss": max(0.1, loss),
|
| 39 |
-
"reward_valid_json": reward_valid,
|
| 40 |
-
"reward_has_required_keys": reward_keys,
|
| 41 |
-
"reward_env_interaction": max(0.0, min(0.4, reward_env)),
|
| 42 |
-
})
|
| 43 |
-
|
| 44 |
-
with open("results/training_log.csv", "w", newline="") as f:
|
| 45 |
-
writer = csv.DictWriter(f, fieldnames=["step", "loss", "reward_valid_json", "reward_has_required_keys", "reward_env_interaction"])
|
| 46 |
-
writer.writeheader()
|
| 47 |
-
writer.writerows(rows)
|
| 48 |
-
|
| 49 |
-
print(f"Generated {len(rows)} training steps -> results/training_log.csv")
|
| 50 |
-
print(f"Heuristic baseline: {overall_heuristic:.3f}")
|
| 51 |
-
print(f"Target performance: {target_performance:.3f}")
|
| 52 |
-
print(f"Final reward_env: {rows[-1]['reward_env_interaction']:.3f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inference.py
CHANGED
|
@@ -39,7 +39,7 @@ except ImportError:
|
|
| 39 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
| 40 |
HF_TOKEN = os.getenv("HF_TOKEN") # Mandatory — no default
|
| 41 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://api-inference.huggingface.co/v1")
|
| 42 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-
|
| 43 |
|
| 44 |
# ── Constants ────────────────────────────────────────────────────────────────
|
| 45 |
BENCHMARK = "gridmind"
|
|
|
|
| 39 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
| 40 |
HF_TOKEN = os.getenv("HF_TOKEN") # Mandatory — no default
|
| 41 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://api-inference.huggingface.co/v1")
|
| 42 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct")
|
| 43 |
|
| 44 |
# ── Constants ────────────────────────────────────────────────────────────────
|
| 45 |
BENCHMARK = "gridmind"
|
main.go
CHANGED
|
@@ -240,15 +240,39 @@ func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
|
|
| 240 |
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
| 241 |
return
|
| 242 |
}
|
| 243 |
-
var req
|
| 244 |
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
| 245 |
-
|
| 246 |
-
req = env.ResetRequest{TaskID: 1}
|
| 247 |
}
|
| 248 |
-
|
| 249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
}
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
w.Header().Set("Content-Type", "application/json")
|
| 253 |
json.NewEncoder(w).Encode(resp)
|
| 254 |
}
|
|
|
|
| 240 |
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
| 241 |
return
|
| 242 |
}
|
| 243 |
+
var req map[string]interface{}
|
| 244 |
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
| 245 |
+
req = make(map[string]interface{})
|
|
|
|
| 246 |
}
|
| 247 |
+
taskID := 1
|
| 248 |
+
if t, ok := req["task_id"].(float64); ok {
|
| 249 |
+
taskID = int(t)
|
| 250 |
+
} else if t, ok := req["task_id"].(int64); ok {
|
| 251 |
+
taskID = int(t)
|
| 252 |
+
} else if t, ok := req["task_id"].(int); ok {
|
| 253 |
+
taskID = t
|
| 254 |
}
|
| 255 |
+
if taskID == 0 {
|
| 256 |
+
taskID = 1
|
| 257 |
+
}
|
| 258 |
+
seed := int64(0)
|
| 259 |
+
if s, ok := req["seed"].(float64); ok {
|
| 260 |
+
seed = int64(s)
|
| 261 |
+
} else if s, ok := req["seed"].(int64); ok {
|
| 262 |
+
seed = s
|
| 263 |
+
}
|
| 264 |
+
numBuildings := 1
|
| 265 |
+
if nb, ok := req["num_buildings"].(float64); ok {
|
| 266 |
+
numBuildings = int(nb)
|
| 267 |
+
}
|
| 268 |
+
resetReq := env.ResetRequest{
|
| 269 |
+
TaskID: taskID,
|
| 270 |
+
NumBuildings: numBuildings,
|
| 271 |
+
}
|
| 272 |
+
if seed > 0 {
|
| 273 |
+
resetReq.Seed = &seed
|
| 274 |
+
}
|
| 275 |
+
resp := s.envMgr.Reset(resetReq)
|
| 276 |
w.Header().Set("Content-Type", "application/json")
|
| 277 |
json.NewEncoder(w).Encode(resp)
|
| 278 |
}
|
openenv.yaml
CHANGED
|
@@ -77,6 +77,22 @@ schemas:
|
|
| 77 |
items:
|
| 78 |
type: string
|
| 79 |
description: "Human-readable list of active fault alarm strings. Empty when no faults. Track 3."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
instruction_card:
|
| 81 |
type: [object, "null"]
|
| 82 |
description: "Natural language objective card. Only populated when task_id=4. Track 2."
|
|
@@ -210,10 +226,14 @@ schemas:
|
|
| 210 |
type: number
|
| 211 |
carbon_reward:
|
| 212 |
type: number
|
| 213 |
-
|
| 214 |
type: number
|
| 215 |
fault_mitigation:
|
| 216 |
type: number
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
total:
|
| 218 |
type: number
|
| 219 |
energy_used_kwh:
|
|
@@ -344,11 +364,11 @@ endpoints:
|
|
| 344 |
health:
|
| 345 |
path: /health
|
| 346 |
method: GET
|
| 347 |
-
description: Health check - returns {"status": "ok", "version": "1.0.0"}
|
| 348 |
ping:
|
| 349 |
path: /ping
|
| 350 |
method: GET
|
| 351 |
-
description: Liveness probe - returns {"status": "ok"}
|
| 352 |
reset:
|
| 353 |
path: /reset
|
| 354 |
method: POST
|
|
|
|
| 77 |
items:
|
| 78 |
type: string
|
| 79 |
description: "Human-readable list of active fault alarm strings. Empty when no faults. Track 3."
|
| 80 |
+
task_card:
|
| 81 |
+
type: string
|
| 82 |
+
nl_summary:
|
| 83 |
+
type: string
|
| 84 |
+
market_type:
|
| 85 |
+
type: string
|
| 86 |
+
season:
|
| 87 |
+
type: string
|
| 88 |
+
price_volatility:
|
| 89 |
+
type: number
|
| 90 |
+
price_forecast:
|
| 91 |
+
type: array
|
| 92 |
+
items:
|
| 93 |
+
type: number
|
| 94 |
+
demand_charge_active:
|
| 95 |
+
type: boolean
|
| 96 |
instruction_card:
|
| 97 |
type: [object, "null"]
|
| 98 |
description: "Natural language objective card. Only populated when task_id=4. Track 2."
|
|
|
|
| 226 |
type: number
|
| 227 |
carbon_reward:
|
| 228 |
type: number
|
| 229 |
+
task_satisfaction:
|
| 230 |
type: number
|
| 231 |
fault_mitigation:
|
| 232 |
type: number
|
| 233 |
+
price_anticipation:
|
| 234 |
+
type: number
|
| 235 |
+
demand_charge_penalty:
|
| 236 |
+
type: number
|
| 237 |
total:
|
| 238 |
type: number
|
| 239 |
energy_used_kwh:
|
|
|
|
| 364 |
health:
|
| 365 |
path: /health
|
| 366 |
method: GET
|
| 367 |
+
description: 'Health check - returns {"status": "ok", "version": "1.0.0"}'
|
| 368 |
ping:
|
| 369 |
path: /ping
|
| 370 |
method: GET
|
| 371 |
+
description: 'Liveness probe - returns {"status": "ok"}'
|
| 372 |
reset:
|
| 373 |
path: /reset
|
| 374 |
method: POST
|