EVChargeEnv / env /ev_charge_env.py

Upload folder using huggingface_hub

a283aa6 verified 2 days ago

5.07 kB

	import gymnasium as gym
	from gymnasium import spaces
	import numpy as np


	class EVChargeEnv(gym.Env):
	"""
	EV charging environment.

	Goal:
	- Reach full battery (charge = 1.0)
	- Minimize cost
	- Avoid stressing the grid

	State (obs):
	[charge_level, price, grid_load, time_step_norm]

	Action:
	continuous charging rate in [0.0, 1.0]
	"""

	metadata = {"render_modes": ["human"]}

	def __init__(self, max_steps: int = 48, scenario: str = "medium"):
	super().__init__()

	# Scenario difficulty
	assert scenario in ["easy", "medium", "hard"]
	self.scenario = scenario

	# Observation: charge, price, load, time
	self.observation_space = spaces.Box(
	low=np.array([0.0, 0.0, 0.0, 0.0], dtype=np.float32),
	high=np.array([1.0, 1.0, 1.0, 1.0], dtype=np.float32),
	dtype=np.float32,
	)

	# Action: charge rate between 0 and 1
	self.action_space = spaces.Box(
	low=np.array([0.0], dtype=np.float32),
	high=np.array([1.0], dtype=np.float32),
	dtype=np.float32,
	)

	self.max_steps = max_steps
	self.step_count = 0

	# Internal state
	self.charge = 0.0
	self.price = 0.0
	self.grid_load = 0.0

	# Scenario parameters (set in reset)
	self.base_price = 0.3
	self.base_load = 0.5
	self.load_threshold = 0.8 # above this → overload penalty
	self.charge_rate_scale = 0.08 # how fast battery fills

	def _set_scenario_params(self):
	"""Set parameters based on difficulty scenario."""
	if self.scenario == "easy":
	self.base_price = 0.25
	self.base_load = 0.4
	self.load_threshold = 0.9
	self.charge_rate_scale = 0.10
	elif self.scenario == "medium":
	self.base_price = 0.30
	self.base_load = 0.5
	self.load_threshold = 0.85
	self.charge_rate_scale = 0.08
	else: # hard
	self.base_price = 0.35
	self.base_load = 0.6
	self.load_threshold = 0.8
	self.charge_rate_scale = 0.06

	def reset(self, seed=None, options=None):
	super().reset(seed=seed)
	if seed is not None:
	np.random.seed(seed)

	self._set_scenario_params()

	self.step_count = 0
	# Random initial charge, slightly low
	self.charge = np.random.uniform(0.1, 0.4)
	# Start price/load around base with small noise
	self.price = np.clip(self.base_price + np.random.normal(0, 0.05), 0.0, 1.0)
	self.grid_load = np.clip(self.base_load + np.random.normal(0, 0.05), 0.0, 1.0)

	obs = self._get_obs()
	return obs, {}

	def _get_obs(self):
	time_step_norm = self.step_count / max(1, self.max_steps - 1)
	return np.array(
	[self.charge, self.price, self.grid_load, time_step_norm],
	dtype=np.float32,
	)

	def step(self, action):
	self.step_count += 1

	# Clamp action into valid range
	a = float(np.clip(action[0], 0.0, 1.0))

	# --- Dynamics ---
	# Battery charging
	self.charge += a * self.charge_rate_scale
	self.charge = float(np.clip(self.charge, 0.0, 1.0))

	# Price & load as noisy processes around base values
	self.price = float(
	np.clip(
	self.price * 0.7
	+ self.base_price * 0.3
	+ np.random.normal(0, 0.05),
	0.0,
	1.0,
	)
	)
	self.grid_load = float(
	np.clip(
	self.grid_load * 0.6
	+ self.base_load * 0.4
	+ np.random.normal(0, 0.07),
	0.0,
	1.0,
	)
	)

	# --- Reward ---
	# Progress reward
	progress = a * self.charge_rate_scale
	progress_reward = progress * 5.0 # scaled up

	# Cost penalty (higher price * more charging = worse)
	cost_penalty = self.price * a * 4.0

	# Grid overload penalty if we charge too much when load is high
	effective_load = self.grid_load + a * 0.2
	overload = max(0.0, effective_load - self.load_threshold)
	overload_penalty = overload * 6.0

	# Small time penalty to encourage faster completion
	time_penalty = 0.01

	reward = progress_reward - cost_penalty - overload_penalty - time_penalty

	# Episode done?
	terminated = self.charge >= 0.999
	truncated = self.step_count >= self.max_steps

	obs = self._get_obs()
	info = {
	"progress_reward": progress_reward,
	"cost_penalty": cost_penalty,
	"overload_penalty": overload_penalty,
	}

	return obs, reward, terminated, truncated, info

	def render(self):
	print(
	f"step={self.step_count} charge={self.charge:.3f} "
	f"price={self.price:.3f} load={self.grid_load:.3f}"
	)