File size: 6,336 Bytes
bc37871 fcb451b bc37871 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | """
Oracle strategy test — validates physics + grader + strategy gaps.
New action space: battery_dispatch, diesel_dispatch, demand_shedding.
Grid is the slack variable (absorbs residual up to ±200 kW).
"""
import sys
sys.path.insert(0, ".")
import numpy as np
from gridops.server.environment import GridOpsEnvironment
from gridops.models import GridOpsAction
from gridops.tasks.definitions import TASKS
def oracle_policy(obs: dict) -> GridOpsAction:
"""
Smart oracle: manages battery for arbitrage + evening peak coverage.
Strategy:
- Night (cheap grid): charge battery
- Solar midday: let solar cover demand, charge battery from surplus
- Pre-peak (15-17h): top up battery
- Evening peak (18-22h): discharge battery to reduce expensive grid import
- Use diesel only when grid is at capacity AND battery is depleted
- Shed demand only as last resort during extreme peaks
"""
hour_of_day = (int(obs["hour"]) + 6) % 24 # episode starts at 6 AM
soc = obs["battery_soc"]
price = obs["grid_price"]
demand = obs["demand_kw"]
solar = obs["solar_kw"]
fuel = obs["diesel_fuel_remaining"]
battery = 0.0 # -1=charge, +1=discharge
diesel = 0.0
shedding = 0.0
# Net demand after solar
net = demand - solar
if hour_of_day < 6:
# Night: cheap power, charge battery aggressively
if soc < 0.9:
battery = -0.8 # charge
else:
battery = 0.0
elif 6 <= hour_of_day < 15:
# Solar hours: if solar > demand, charge battery from surplus
if solar > demand:
# Surplus — charge battery (grid absorbs the rest as export)
if soc < 0.95:
battery = -min(1.0, (solar - demand) / 100.0)
else:
battery = 0.0 # battery full, surplus exports to grid
else:
# Deficit — grid covers it. Charge battery if cheap.
if soc < 0.7 and price < 6:
battery = -0.5
else:
battery = 0.0
elif 15 <= hour_of_day < 18:
# Pre-peak: ensure battery is charged for evening
if soc < 0.8:
battery = -0.8 # charge hard
else:
battery = 0.0
elif 18 <= hour_of_day < 23:
# Evening peak: discharge battery to cover demand beyond grid cap
if net > GRID_MAX_KW and soc > 0.1:
# Need battery to cover the gap
gap = net - GRID_MAX_KW
battery = min(1.0, gap / 100.0)
# If battery can't cover full gap, use diesel
remaining = gap - battery * 100
if remaining > 0 and fuel > 0.05:
diesel = min(1.0, remaining / 100.0)
# If still short, shed demand
remaining2 = remaining - diesel * 100
if remaining2 > 0:
shedding = min(1.0, remaining2 / (demand * 0.20 + 1))
elif price > 10 and soc > 0.5:
# Expensive grid: discharge battery to save money
battery = min(0.6, (price - 8) / 10.0)
else:
battery = 0.0
else:
# Hour 23: low demand, recharge if depleted
if soc < 0.4:
battery = -0.5
else:
battery = 0.0
return GridOpsAction(
battery_dispatch=float(np.clip(battery, -1, 1)),
diesel_dispatch=float(np.clip(diesel, 0, 1)),
demand_shedding=float(np.clip(shedding, 0, 1)),
)
GRID_MAX_KW = 200.0 # for oracle calculations
def heuristic_do_nothing(obs: dict) -> GridOpsAction:
"""Baseline: do nothing. Grid handles everything as slack."""
return GridOpsAction(battery_dispatch=0.0, diesel_dispatch=0.0, demand_shedding=0.0)
def heuristic_always_discharge(obs: dict) -> GridOpsAction:
"""Bad: always discharge battery → empty for evening → blackout."""
return GridOpsAction(battery_dispatch=1.0, diesel_dispatch=0.0, demand_shedding=0.0)
def heuristic_always_diesel(obs: dict) -> GridOpsAction:
"""Wasteful: always run diesel → hemorrhages money at Rs 25/kWh."""
return GridOpsAction(battery_dispatch=0.0, diesel_dispatch=1.0, demand_shedding=0.0)
def run_episode(env, policy_fn, task_id="task_1_normal", seed=42):
"""Run a full 72-step episode, return grade dict."""
obs = env.reset(seed=seed, task_id=task_id)
obs_dict = obs.model_dump()
for _ in range(72):
action = policy_fn(obs_dict)
obs = env.step(action)
obs_dict = obs.model_dump()
if obs.done:
break
state = env.state
return state.grade
def main():
env = GridOpsEnvironment()
policies = {
"Oracle": oracle_policy,
"Do-Nothing": heuristic_do_nothing,
"Always-Discharge": heuristic_always_discharge,
"Always-Diesel": heuristic_always_diesel,
}
print("=" * 70)
print(" GridOps Oracle Test v2 — New Action Space (Battery/Diesel/Shed)")
print(" Grid is slack. VoLL = Rs 150/kWh. Degradation = Rs 2.5/kWh.")
print("=" * 70)
for task_id in TASKS:
print(f"\n--- {task_id} ---")
for name, fn in policies.items():
grade = run_episode(env, fn, task_id)
if grade:
print(f" {name:22s} score={grade['score']:.4f} "
f"reliability={grade['reliability']:.4f} "
f"cost=Rs {grade['actual_cost']:.0f} "
f"baseline=Rs {grade['baseline_cost']:.0f}")
else:
print(f" {name:22s} NO GRADE")
# Determinism check
print("\n--- Determinism Check (3 runs of Oracle on Task 1) ---")
scores = []
for i in range(3):
grade = run_episode(env, oracle_policy, "task_1_normal", seed=42)
scores.append(grade["score"])
print(f" Run {i+1}: score={grade['score']:.4f}")
if len(set(f"{s:.6f}" for s in scores)) == 1:
print(" Deterministic: identical scores across runs")
else:
print(" NON-DETERMINISTIC: scores differ!")
# Detailed oracle breakdown
print("\n--- Oracle Detailed Breakdown (Task 1) ---")
grade = run_episode(env, oracle_policy, "task_1_normal", seed=42)
for k, v in grade.items():
print(f" {k}: {v}")
print("\n" + "=" * 70)
if __name__ == "__main__":
main()
|