File size: 3,235 Bytes
bc37871 e7ae456 bc37871 e7ae456 bc37871 e7ae456 bc37871 e7ae456 bc37871 e7ae456 bc37871 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | """
Oracle strategy test — validates physics + grader + strategy gaps.
New action space: battery_dispatch, diesel_dispatch, demand_shedding.
Grid is the slack variable (absorbs residual up to ±200 kW).
"""
import sys
sys.path.insert(0, ".")
from gridops.server.environment import GridOpsEnvironment
from gridops.models import GridOpsAction
from gridops.policies import (
always_diesel_policy,
always_discharge_policy,
do_nothing_policy,
oracle_policy,
)
from gridops.tasks.definitions import TASKS
def heuristic_do_nothing(obs: dict) -> GridOpsAction:
"""Baseline: do nothing. Grid handles everything as slack."""
return do_nothing_policy(obs)
def heuristic_always_discharge(obs: dict) -> GridOpsAction:
"""Bad: always discharge battery → empty for evening → blackout."""
return always_discharge_policy(obs)
def heuristic_always_diesel(obs: dict) -> GridOpsAction:
"""Wasteful: always run diesel → hemorrhages money at Rs 25/kWh."""
return always_diesel_policy(obs)
def run_episode(env, policy_fn, task_id="task_1_normal", seed=42):
"""Run a full 72-step episode, return grade dict."""
obs = env.reset(seed=seed, task_id=task_id)
obs_dict = obs.model_dump()
for _ in range(72):
action = policy_fn(obs_dict, task_id) if policy_fn is oracle_policy else policy_fn(obs_dict)
obs = env.step(action)
obs_dict = obs.model_dump()
if obs.done:
break
state = env.state
return state.grade
def main():
env = GridOpsEnvironment()
policies = {
"Oracle": oracle_policy,
"Do-Nothing": heuristic_do_nothing,
"Always-Discharge": heuristic_always_discharge,
"Always-Diesel": heuristic_always_diesel,
}
print("=" * 70)
print(" GridOps Oracle Test v2 — New Action Space (Battery/Diesel/Shed)")
print(" Grid is slack. VoLL = Rs 150/kWh. Degradation = Rs 2.5/kWh.")
print("=" * 70)
for task_id in TASKS:
print(f"\n--- {task_id} ---")
for name, fn in policies.items():
grade = run_episode(env, fn, task_id)
if grade:
print(f" {name:22s} score={grade['score']:.4f} "
f"reliability={grade['reliability']:.4f} "
f"cost=Rs {grade['actual_cost']:.0f} "
f"baseline=Rs {grade['baseline_cost']:.0f}")
else:
print(f" {name:22s} NO GRADE")
# Determinism check
print("\n--- Determinism Check (3 runs of Oracle on Task 1) ---")
scores = []
for i in range(3):
grade = run_episode(env, oracle_policy, "task_1_normal", seed=42)
scores.append(grade["score"])
print(f" Run {i+1}: score={grade['score']:.4f}")
if len(set(f"{s:.6f}" for s in scores)) == 1:
print(" Deterministic: identical scores across runs")
else:
print(" NON-DETERMINISTIC: scores differ!")
# Detailed oracle breakdown
print("\n--- Oracle Detailed Breakdown (Task 1) ---")
grade = run_episode(env, oracle_policy, "task_1_normal", seed=42)
for k, v in grade.items():
print(f" {k}: {v}")
print("\n" + "=" * 70)
if __name__ == "__main__":
main()
|