| """ |
| Oracle strategy test — validates physics + grader + strategy gaps. |
| |
| New action space: battery_dispatch, diesel_dispatch, demand_shedding. |
| Grid is the slack variable (absorbs residual up to ±200 kW). |
| """ |
|
|
| import sys |
| sys.path.insert(0, ".") |
|
|
| from gridops.server.environment import GridOpsEnvironment |
| from gridops.models import GridOpsAction |
| from gridops.policies import ( |
| always_diesel_policy, |
| always_discharge_policy, |
| do_nothing_policy, |
| oracle_policy, |
| ) |
| from gridops.tasks.definitions import TASKS |
|
|
|
|
| def heuristic_do_nothing(obs: dict) -> GridOpsAction: |
| """Baseline: do nothing. Grid handles everything as slack.""" |
| return do_nothing_policy(obs) |
|
|
|
|
| def heuristic_always_discharge(obs: dict) -> GridOpsAction: |
| """Bad: always discharge battery → empty for evening → blackout.""" |
| return always_discharge_policy(obs) |
|
|
|
|
| def heuristic_always_diesel(obs: dict) -> GridOpsAction: |
| """Wasteful: always run diesel → hemorrhages money at Rs 25/kWh.""" |
| return always_diesel_policy(obs) |
|
|
|
|
| def run_episode(env, policy_fn, task_id="task_1_normal", seed=42): |
| """Run a full 72-step episode, return grade dict.""" |
| obs = env.reset(seed=seed, task_id=task_id) |
| obs_dict = obs.model_dump() |
|
|
| for _ in range(72): |
| action = policy_fn(obs_dict, task_id) if policy_fn is oracle_policy else policy_fn(obs_dict) |
| obs = env.step(action) |
| obs_dict = obs.model_dump() |
| if obs.done: |
| break |
|
|
| state = env.state |
| return state.grade |
|
|
|
|
| def main(): |
| env = GridOpsEnvironment() |
| policies = { |
| "Oracle": oracle_policy, |
| "Do-Nothing": heuristic_do_nothing, |
| "Always-Discharge": heuristic_always_discharge, |
| "Always-Diesel": heuristic_always_diesel, |
| } |
|
|
| print("=" * 70) |
| print(" GridOps Oracle Test v2 — New Action Space (Battery/Diesel/Shed)") |
| print(" Grid is slack. VoLL = Rs 150/kWh. Degradation = Rs 2.5/kWh.") |
| print("=" * 70) |
|
|
| for task_id in TASKS: |
| print(f"\n--- {task_id} ---") |
| for name, fn in policies.items(): |
| grade = run_episode(env, fn, task_id) |
| if grade: |
| print(f" {name:22s} score={grade['score']:.4f} " |
| f"reliability={grade['reliability']:.4f} " |
| f"cost=Rs {grade['actual_cost']:.0f} " |
| f"baseline=Rs {grade['baseline_cost']:.0f}") |
| else: |
| print(f" {name:22s} NO GRADE") |
|
|
| |
| print("\n--- Determinism Check (3 runs of Oracle on Task 1) ---") |
| scores = [] |
| for i in range(3): |
| grade = run_episode(env, oracle_policy, "task_1_normal", seed=42) |
| scores.append(grade["score"]) |
| print(f" Run {i+1}: score={grade['score']:.4f}") |
|
|
| if len(set(f"{s:.6f}" for s in scores)) == 1: |
| print(" Deterministic: identical scores across runs") |
| else: |
| print(" NON-DETERMINISTIC: scores differ!") |
|
|
| |
| print("\n--- Oracle Detailed Breakdown (Task 1) ---") |
| grade = run_episode(env, oracle_policy, "task_1_normal", seed=42) |
| for k, v in grade.items(): |
| print(f" {k}: {v}") |
|
|
| print("\n" + "=" * 70) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|