File size: 3,235 Bytes
bc37871
 
 
 
 
 
 
 
 
 
 
 
e7ae456
 
 
 
 
 
bc37871
 
 
 
 
e7ae456
bc37871
 
 
 
e7ae456
bc37871
 
 
 
e7ae456
bc37871
 
 
 
 
 
 
 
e7ae456
bc37871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
Oracle strategy test — validates physics + grader + strategy gaps.

New action space: battery_dispatch, diesel_dispatch, demand_shedding.
Grid is the slack variable (absorbs residual up to ±200 kW).
"""

import sys
sys.path.insert(0, ".")

from gridops.server.environment import GridOpsEnvironment
from gridops.models import GridOpsAction
from gridops.policies import (
    always_diesel_policy,
    always_discharge_policy,
    do_nothing_policy,
    oracle_policy,
)
from gridops.tasks.definitions import TASKS


def heuristic_do_nothing(obs: dict) -> GridOpsAction:
    """Baseline: do nothing. Grid handles everything as slack."""
    return do_nothing_policy(obs)


def heuristic_always_discharge(obs: dict) -> GridOpsAction:
    """Bad: always discharge battery → empty for evening → blackout."""
    return always_discharge_policy(obs)


def heuristic_always_diesel(obs: dict) -> GridOpsAction:
    """Wasteful: always run diesel → hemorrhages money at Rs 25/kWh."""
    return always_diesel_policy(obs)


def run_episode(env, policy_fn, task_id="task_1_normal", seed=42):
    """Run a full 72-step episode, return grade dict."""
    obs = env.reset(seed=seed, task_id=task_id)
    obs_dict = obs.model_dump()

    for _ in range(72):
        action = policy_fn(obs_dict, task_id) if policy_fn is oracle_policy else policy_fn(obs_dict)
        obs = env.step(action)
        obs_dict = obs.model_dump()
        if obs.done:
            break

    state = env.state
    return state.grade


def main():
    env = GridOpsEnvironment()
    policies = {
        "Oracle": oracle_policy,
        "Do-Nothing": heuristic_do_nothing,
        "Always-Discharge": heuristic_always_discharge,
        "Always-Diesel": heuristic_always_diesel,
    }

    print("=" * 70)
    print("  GridOps Oracle Test v2 — New Action Space (Battery/Diesel/Shed)")
    print("  Grid is slack. VoLL = Rs 150/kWh. Degradation = Rs 2.5/kWh.")
    print("=" * 70)

    for task_id in TASKS:
        print(f"\n--- {task_id} ---")
        for name, fn in policies.items():
            grade = run_episode(env, fn, task_id)
            if grade:
                print(f"  {name:22s}  score={grade['score']:.4f}  "
                      f"reliability={grade['reliability']:.4f}  "
                      f"cost=Rs {grade['actual_cost']:.0f}  "
                      f"baseline=Rs {grade['baseline_cost']:.0f}")
            else:
                print(f"  {name:22s}  NO GRADE")

    # Determinism check
    print("\n--- Determinism Check (3 runs of Oracle on Task 1) ---")
    scores = []
    for i in range(3):
        grade = run_episode(env, oracle_policy, "task_1_normal", seed=42)
        scores.append(grade["score"])
        print(f"  Run {i+1}: score={grade['score']:.4f}")

    if len(set(f"{s:.6f}" for s in scores)) == 1:
        print("  Deterministic: identical scores across runs")
    else:
        print("  NON-DETERMINISTIC: scores differ!")

    # Detailed oracle breakdown
    print("\n--- Oracle Detailed Breakdown (Task 1) ---")
    grade = run_episode(env, oracle_policy, "task_1_normal", seed=42)
    for k, v in grade.items():
        print(f"  {k}: {v}")

    print("\n" + "=" * 70)


if __name__ == "__main__":
    main()