EVChargeEnv / env /ev_charge_env.py
oozan's picture
Upload folder using huggingface_hub
a283aa6 verified
import gymnasium as gym
from gymnasium import spaces
import numpy as np
class EVChargeEnv(gym.Env):
"""
EV charging environment.
Goal:
- Reach full battery (charge = 1.0)
- Minimize cost
- Avoid stressing the grid
State (obs):
[charge_level, price, grid_load, time_step_norm]
Action:
continuous charging rate in [0.0, 1.0]
"""
metadata = {"render_modes": ["human"]}
def __init__(self, max_steps: int = 48, scenario: str = "medium"):
super().__init__()
# Scenario difficulty
assert scenario in ["easy", "medium", "hard"]
self.scenario = scenario
# Observation: charge, price, load, time
self.observation_space = spaces.Box(
low=np.array([0.0, 0.0, 0.0, 0.0], dtype=np.float32),
high=np.array([1.0, 1.0, 1.0, 1.0], dtype=np.float32),
dtype=np.float32,
)
# Action: charge rate between 0 and 1
self.action_space = spaces.Box(
low=np.array([0.0], dtype=np.float32),
high=np.array([1.0], dtype=np.float32),
dtype=np.float32,
)
self.max_steps = max_steps
self.step_count = 0
# Internal state
self.charge = 0.0
self.price = 0.0
self.grid_load = 0.0
# Scenario parameters (set in reset)
self.base_price = 0.3
self.base_load = 0.5
self.load_threshold = 0.8 # above this → overload penalty
self.charge_rate_scale = 0.08 # how fast battery fills
def _set_scenario_params(self):
"""Set parameters based on difficulty scenario."""
if self.scenario == "easy":
self.base_price = 0.25
self.base_load = 0.4
self.load_threshold = 0.9
self.charge_rate_scale = 0.10
elif self.scenario == "medium":
self.base_price = 0.30
self.base_load = 0.5
self.load_threshold = 0.85
self.charge_rate_scale = 0.08
else: # hard
self.base_price = 0.35
self.base_load = 0.6
self.load_threshold = 0.8
self.charge_rate_scale = 0.06
def reset(self, seed=None, options=None):
super().reset(seed=seed)
if seed is not None:
np.random.seed(seed)
self._set_scenario_params()
self.step_count = 0
# Random initial charge, slightly low
self.charge = np.random.uniform(0.1, 0.4)
# Start price/load around base with small noise
self.price = np.clip(self.base_price + np.random.normal(0, 0.05), 0.0, 1.0)
self.grid_load = np.clip(self.base_load + np.random.normal(0, 0.05), 0.0, 1.0)
obs = self._get_obs()
return obs, {}
def _get_obs(self):
time_step_norm = self.step_count / max(1, self.max_steps - 1)
return np.array(
[self.charge, self.price, self.grid_load, time_step_norm],
dtype=np.float32,
)
def step(self, action):
self.step_count += 1
# Clamp action into valid range
a = float(np.clip(action[0], 0.0, 1.0))
# --- Dynamics ---
# Battery charging
self.charge += a * self.charge_rate_scale
self.charge = float(np.clip(self.charge, 0.0, 1.0))
# Price & load as noisy processes around base values
self.price = float(
np.clip(
self.price * 0.7
+ self.base_price * 0.3
+ np.random.normal(0, 0.05),
0.0,
1.0,
)
)
self.grid_load = float(
np.clip(
self.grid_load * 0.6
+ self.base_load * 0.4
+ np.random.normal(0, 0.07),
0.0,
1.0,
)
)
# --- Reward ---
# Progress reward
progress = a * self.charge_rate_scale
progress_reward = progress * 5.0 # scaled up
# Cost penalty (higher price * more charging = worse)
cost_penalty = self.price * a * 4.0
# Grid overload penalty if we charge too much when load is high
effective_load = self.grid_load + a * 0.2
overload = max(0.0, effective_load - self.load_threshold)
overload_penalty = overload * 6.0
# Small time penalty to encourage faster completion
time_penalty = 0.01
reward = progress_reward - cost_penalty - overload_penalty - time_penalty
# Episode done?
terminated = self.charge >= 0.999
truncated = self.step_count >= self.max_steps
obs = self._get_obs()
info = {
"progress_reward": progress_reward,
"cost_penalty": cost_penalty,
"overload_penalty": overload_penalty,
}
return obs, reward, terminated, truncated, info
def render(self):
print(
f"step={self.step_count} charge={self.charge:.3f} "
f"price={self.price:.3f} load={self.grid_load:.3f}"
)