File size: 6,336 Bytes
bc37871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcb451b
bc37871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""
Oracle strategy test — validates physics + grader + strategy gaps.

New action space: battery_dispatch, diesel_dispatch, demand_shedding.
Grid is the slack variable (absorbs residual up to ±200 kW).
"""

import sys
sys.path.insert(0, ".")

import numpy as np
from gridops.server.environment import GridOpsEnvironment
from gridops.models import GridOpsAction
from gridops.tasks.definitions import TASKS


def oracle_policy(obs: dict) -> GridOpsAction:
    """
    Smart oracle: manages battery for arbitrage + evening peak coverage.

    Strategy:
      - Night (cheap grid): charge battery
      - Solar midday: let solar cover demand, charge battery from surplus
      - Pre-peak (15-17h): top up battery
      - Evening peak (18-22h): discharge battery to reduce expensive grid import
      - Use diesel only when grid is at capacity AND battery is depleted
      - Shed demand only as last resort during extreme peaks
    """
    hour_of_day = (int(obs["hour"]) + 6) % 24  # episode starts at 6 AM
    soc = obs["battery_soc"]
    price = obs["grid_price"]
    demand = obs["demand_kw"]
    solar = obs["solar_kw"]
    fuel = obs["diesel_fuel_remaining"]

    battery = 0.0   # -1=charge, +1=discharge
    diesel = 0.0
    shedding = 0.0

    # Net demand after solar
    net = demand - solar

    if hour_of_day < 6:
        # Night: cheap power, charge battery aggressively
        if soc < 0.9:
            battery = -0.8  # charge
        else:
            battery = 0.0

    elif 6 <= hour_of_day < 15:
        # Solar hours: if solar > demand, charge battery from surplus
        if solar > demand:
            # Surplus — charge battery (grid absorbs the rest as export)
            if soc < 0.95:
                battery = -min(1.0, (solar - demand) / 100.0)
            else:
                battery = 0.0  # battery full, surplus exports to grid
        else:
            # Deficit — grid covers it. Charge battery if cheap.
            if soc < 0.7 and price < 6:
                battery = -0.5
            else:
                battery = 0.0

    elif 15 <= hour_of_day < 18:
        # Pre-peak: ensure battery is charged for evening
        if soc < 0.8:
            battery = -0.8  # charge hard
        else:
            battery = 0.0

    elif 18 <= hour_of_day < 23:
        # Evening peak: discharge battery to cover demand beyond grid cap
        if net > GRID_MAX_KW and soc > 0.1:
            # Need battery to cover the gap
            gap = net - GRID_MAX_KW
            battery = min(1.0, gap / 100.0)

            # If battery can't cover full gap, use diesel
            remaining = gap - battery * 100
            if remaining > 0 and fuel > 0.05:
                diesel = min(1.0, remaining / 100.0)

            # If still short, shed demand
            remaining2 = remaining - diesel * 100
            if remaining2 > 0:
                shedding = min(1.0, remaining2 / (demand * 0.20 + 1))
        elif price > 10 and soc > 0.5:
            # Expensive grid: discharge battery to save money
            battery = min(0.6, (price - 8) / 10.0)
        else:
            battery = 0.0

    else:
        # Hour 23: low demand, recharge if depleted
        if soc < 0.4:
            battery = -0.5
        else:
            battery = 0.0

    return GridOpsAction(
        battery_dispatch=float(np.clip(battery, -1, 1)),
        diesel_dispatch=float(np.clip(diesel, 0, 1)),
        demand_shedding=float(np.clip(shedding, 0, 1)),
    )


GRID_MAX_KW = 200.0  # for oracle calculations


def heuristic_do_nothing(obs: dict) -> GridOpsAction:
    """Baseline: do nothing. Grid handles everything as slack."""
    return GridOpsAction(battery_dispatch=0.0, diesel_dispatch=0.0, demand_shedding=0.0)


def heuristic_always_discharge(obs: dict) -> GridOpsAction:
    """Bad: always discharge battery → empty for evening → blackout."""
    return GridOpsAction(battery_dispatch=1.0, diesel_dispatch=0.0, demand_shedding=0.0)


def heuristic_always_diesel(obs: dict) -> GridOpsAction:
    """Wasteful: always run diesel → hemorrhages money at Rs 25/kWh."""
    return GridOpsAction(battery_dispatch=0.0, diesel_dispatch=1.0, demand_shedding=0.0)


def run_episode(env, policy_fn, task_id="task_1_normal", seed=42):
    """Run a full 72-step episode, return grade dict."""
    obs = env.reset(seed=seed, task_id=task_id)
    obs_dict = obs.model_dump()

    for _ in range(72):
        action = policy_fn(obs_dict)
        obs = env.step(action)
        obs_dict = obs.model_dump()
        if obs.done:
            break

    state = env.state
    return state.grade


def main():
    env = GridOpsEnvironment()
    policies = {
        "Oracle": oracle_policy,
        "Do-Nothing": heuristic_do_nothing,
        "Always-Discharge": heuristic_always_discharge,
        "Always-Diesel": heuristic_always_diesel,
    }

    print("=" * 70)
    print("  GridOps Oracle Test v2 — New Action Space (Battery/Diesel/Shed)")
    print("  Grid is slack. VoLL = Rs 150/kWh. Degradation = Rs 2.5/kWh.")
    print("=" * 70)

    for task_id in TASKS:
        print(f"\n--- {task_id} ---")
        for name, fn in policies.items():
            grade = run_episode(env, fn, task_id)
            if grade:
                print(f"  {name:22s}  score={grade['score']:.4f}  "
                      f"reliability={grade['reliability']:.4f}  "
                      f"cost=Rs {grade['actual_cost']:.0f}  "
                      f"baseline=Rs {grade['baseline_cost']:.0f}")
            else:
                print(f"  {name:22s}  NO GRADE")

    # Determinism check
    print("\n--- Determinism Check (3 runs of Oracle on Task 1) ---")
    scores = []
    for i in range(3):
        grade = run_episode(env, oracle_policy, "task_1_normal", seed=42)
        scores.append(grade["score"])
        print(f"  Run {i+1}: score={grade['score']:.4f}")

    if len(set(f"{s:.6f}" for s in scores)) == 1:
        print("  Deterministic: identical scores across runs")
    else:
        print("  NON-DETERMINISTIC: scores differ!")

    # Detailed oracle breakdown
    print("\n--- Oracle Detailed Breakdown (Task 1) ---")
    grade = run_episode(env, oracle_policy, "task_1_normal", seed=42)
    for k, v in grade.items():
        print(f"  {k}: {v}")

    print("\n" + "=" * 70)


if __name__ == "__main__":
    main()