Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

Dockerfile +10 -0
README.md +138 -0
agent/__pycache__/baseline_agent.cpython-312.pyc +0 -0
agent/__pycache__/price_aware_agent.cpython-312.pyc +0 -0
agent/_init_.py +0 -0
agent/baseline_agent.py +5 -0
agent/price_aware_agent.py +44 -0
env/__pycache__/ev_charge_env.cpython-312.pyc +0 -0
env/_init_.py +17 -0
env/ev_charge_env.py +167 -0
evchargeenv_manifest.json +37 -0
openenv.yaml +51 -0
requirements.txt +2 -0
run_evaluation.py +45 -0
run_price_aware_evaluation.py +48 -0
sample_output.json +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["python", "run_evaluation.py"]

README.md ADDED Viewed

	@@ -0,0 +1,138 @@

+<h1 align="center">⚡ EVChargeEnv</h1>
+<p align="center">
+  <img src="assets/evchargeenv-banner.png" width="800" />
+</p>
+<h3 align="center">Green Agent Benchmark for EV Charging Optimization</h3>
+---
+## Overview
+EVChargeEnv is a lightweight, stochastic reinforcement-learning environment designed for the
+AgentX + AgentBeats Competition (Berkeley RDI 2025).
+It simulates:
+- Electric vehicle battery charging
+- Dynamic electricity pricing
+- Fluctuating grid load
+- Continuous control actions
+- Multi-objective tradeoffs (cost vs. speed vs. grid stability)
+---
+## Task Goal
+The purple agent must:
+- Charge the EV battery to full (1.0)
+- Minimize electricity cost
+- Avoid high grid load
+- Adapt to changing conditions
+---
+## State Space (Observation)
+The agent receives:
+charge_level (0-1), price (0-1), grid_load (0-1), time_step_norm (0-1)
+---
+## Action Space
+Continuous charge rate 0.0 → 1.0.
+---
+## Reward Function
+Reward combines:
+- progress_reward
+* cost_penalty
+* overload_penalty
+* time_penalty
+---
+## Scenarios
+easy / medium / hard difficulty with different volatility and load patterns.
+---
+## Episode Termination
+Ends if full charge or max steps reached.
+---
+## Example Agent Behaviors
+Greedy agent = fast but expensive
+Price-aware agent = slower but cheaper
+Random agent = unstable
+---
+## Evaluation Output
+Running:
+python run_evaluation.py
+Generates JSON like:
+{
+"avg_reward": ...,
+"avg_steps": ...,
+"episodes": 5
+}
+---
+## Docker Support
+Image: oozan/evchargeenv:latest
+---
+## File Structure
+env/
+agent/
+run_evaluation.py
+Dockerfile
+requirements.txt
+README.md
+---
+## Future Improvements
+- renewable energy factor
+- blackout events
+- degradation model
+- RL baseline
+- trajectory visualizer
+- mini-game UI
+## Benchmark Specification
+This repository also includes a machine-readable benchmark manifest:
+- `evchargeenv_manifest.json`
+It documents:
+- state and action spaces
+- reward components
+- termination conditions
+- supported scenarios (`easy`, `medium`, `hard`)
+- evaluation output format (JSON fields)
+This makes EVChargeEnv easier to integrate as a standardized benchmark and aligns with the spirit of the OpenEnv challenge: environments that are transparent, reproducible, and extensible.

agent/__pycache__/baseline_agent.cpython-312.pyc ADDED Viewed

Binary file (728 Bytes). View file

agent/__pycache__/price_aware_agent.cpython-312.pyc ADDED Viewed

Binary file (2.02 kB). View file

agent/_init_.py ADDED Viewed

File without changes

agent/baseline_agent.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import numpy as np
+class BaselineAgent:
+    def select_action(self, observation):
+        return np.array([np.random.random()], dtype=np.float32)

agent/price_aware_agent.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+class PriceAwareAgent:
+    """
+    Heuristic agent for EVChargeEnv.
+    - Charges more when price is low and grid load is safe.
+    - Charges less when price is high or grid load is high.
+    """
+    def __init__(self,
+                 low_price_threshold: float = 0.4,
+                 high_price_threshold: float = 0.7,
+                 high_load_threshold: float = 0.8):
+        self.low_price_threshold = low_price_threshold
+        self.high_price_threshold = high_price_threshold
+        self.high_load_threshold = high_load_threshold
+    def select_action(self, observation):
+        """
+        observation = [charge, price, load, time_step_norm]
+        returns: np.array([action]) in [0, 1]
+        """
+        charge, price, load, t = observation
+        # If almost full, stop charging.
+        if charge >= 0.98:
+            return np.array([0.0], dtype=np.float32)
+        # If grid is very stressed, back off.
+        if load >= self.high_load_threshold:
+            return np.array([0.1], dtype=np.float32)
+        # If price is low, charge aggressively.
+        if price <= self.low_price_threshold:
+            return np.array([0.9], dtype=np.float32)
+        # If price is very high, charge slowly, just enough to make progress.
+        if price >= self.high_price_threshold:
+            return np.array([0.2], dtype=np.float32)
+        # Medium case: moderate charging.
+        return np.array([0.5], dtype=np.float32)

env/__pycache__/ev_charge_env.cpython-312.pyc ADDED Viewed

Binary file (6.78 kB). View file

env/_init_.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from .ev_charge_env import EVChargeEnv
+def register_env():
+    """
+    Register EVChargeEnv in an OpenEnv-compatible registry.
+    """
+    try:
+        import openenv
+        openenv.register(
+            id="EVChargeEnv-v0",
+            entry_point="env.ev_charge_env:EVChargeEnv",
+        )
+        print("EVChargeEnv-v0 registered successfully.")
+    except ImportError:
+        # OpenEnv not installed – safe fallback
+        pass

env/ev_charge_env.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import gymnasium as gym
+from gymnasium import spaces
+import numpy as np
+class EVChargeEnv(gym.Env):
+    """
+    EV charging environment.
+    Goal:
+      - Reach full battery (charge = 1.0)
+      - Minimize cost
+      - Avoid stressing the grid
+    State (obs):
+      [charge_level, price, grid_load, time_step_norm]
+    Action:
+      continuous charging rate in [0.0, 1.0]
+    """
+    metadata = {"render_modes": ["human"]}
+    def __init__(self, max_steps: int = 48, scenario: str = "medium"):
+        super().__init__()
+        # Scenario difficulty
+        assert scenario in ["easy", "medium", "hard"]
+        self.scenario = scenario
+        # Observation: charge, price, load, time
+        self.observation_space = spaces.Box(
+            low=np.array([0.0, 0.0, 0.0, 0.0], dtype=np.float32),
+            high=np.array([1.0, 1.0, 1.0, 1.0], dtype=np.float32),
+            dtype=np.float32,
+        )
+        # Action: charge rate between 0 and 1
+        self.action_space = spaces.Box(
+            low=np.array([0.0], dtype=np.float32),
+            high=np.array([1.0], dtype=np.float32),
+            dtype=np.float32,
+        )
+        self.max_steps = max_steps
+        self.step_count = 0
+        # Internal state
+        self.charge = 0.0
+        self.price = 0.0
+        self.grid_load = 0.0
+        # Scenario parameters (set in reset)
+        self.base_price = 0.3
+        self.base_load = 0.5
+        self.load_threshold = 0.8  # above this → overload penalty
+        self.charge_rate_scale = 0.08  # how fast battery fills
+    def _set_scenario_params(self):
+        """Set parameters based on difficulty scenario."""
+        if self.scenario == "easy":
+            self.base_price = 0.25
+            self.base_load = 0.4
+            self.load_threshold = 0.9
+            self.charge_rate_scale = 0.10
+        elif self.scenario == "medium":
+            self.base_price = 0.30
+            self.base_load = 0.5
+            self.load_threshold = 0.85
+            self.charge_rate_scale = 0.08
+        else:  # hard
+            self.base_price = 0.35
+            self.base_load = 0.6
+            self.load_threshold = 0.8
+            self.charge_rate_scale = 0.06
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        if seed is not None:
+            np.random.seed(seed)
+        self._set_scenario_params()
+        self.step_count = 0
+        # Random initial charge, slightly low
+        self.charge = np.random.uniform(0.1, 0.4)
+        # Start price/load around base with small noise
+        self.price = np.clip(self.base_price + np.random.normal(0, 0.05), 0.0, 1.0)
+        self.grid_load = np.clip(self.base_load + np.random.normal(0, 0.05), 0.0, 1.0)
+        obs = self._get_obs()
+        return obs, {}
+    def _get_obs(self):
+        time_step_norm = self.step_count / max(1, self.max_steps - 1)
+        return np.array(
+            [self.charge, self.price, self.grid_load, time_step_norm],
+            dtype=np.float32,
+        )
+    def step(self, action):
+        self.step_count += 1
+        # Clamp action into valid range
+        a = float(np.clip(action[0], 0.0, 1.0))
+        # --- Dynamics ---
+        # Battery charging
+        self.charge += a * self.charge_rate_scale
+        self.charge = float(np.clip(self.charge, 0.0, 1.0))
+        # Price & load as noisy processes around base values
+        self.price = float(
+            np.clip(
+                self.price * 0.7
+                + self.base_price * 0.3
+                + np.random.normal(0, 0.05),
+                0.0,
+                1.0,
+            )
+        )
+        self.grid_load = float(
+            np.clip(
+                self.grid_load * 0.6
+                + self.base_load * 0.4
+                + np.random.normal(0, 0.07),
+                0.0,
+                1.0,
+            )
+        )
+        # --- Reward ---
+        # Progress reward
+        progress = a * self.charge_rate_scale
+        progress_reward = progress * 5.0  # scaled up
+        # Cost penalty (higher price * more charging = worse)
+        cost_penalty = self.price * a * 4.0
+        # Grid overload penalty if we charge too much when load is high
+        effective_load = self.grid_load + a * 0.2
+        overload = max(0.0, effective_load - self.load_threshold)
+        overload_penalty = overload * 6.0
+        # Small time penalty to encourage faster completion
+        time_penalty = 0.01
+        reward = progress_reward - cost_penalty - overload_penalty - time_penalty
+        # Episode done?
+        terminated = self.charge >= 0.999
+        truncated = self.step_count >= self.max_steps
+        obs = self._get_obs()
+        info = {
+            "progress_reward": progress_reward,
+            "cost_penalty": cost_penalty,
+            "overload_penalty": overload_penalty,
+        }
+        return obs, reward, terminated, truncated, info
+    def render(self):
+        print(
+            f"step={self.step_count} charge={self.charge:.3f} "
+            f"price={self.price:.3f} load={self.grid_load:.3f}"
+        )

evchargeenv_manifest.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "name": "EVChargeEnv",
+  "description": "An EV charging optimization benchmark environment for testing agents under dynamic prices and variable grid load.",
+  "version": "0.1.0",
+  "task_type": "continuous_control",
+  "domain": "energy_ev_charging",
+  "observation_space": {
+    "type": "Box",
+    "shape": [4],
+    "components": [
+      "charge_level (0-1)",
+      "price (0-1)",
+      "grid_load (0-1)",
+      "time_step_norm (0-1)"
+    ]
+  },
+  "action_space": {
+    "type": "Box",
+    "shape": [1],
+    "description": "continuous charging rate in [0, 1]"
+  },
+  "scenarios": ["easy", "medium", "hard"],
+  "reward_components": [
+    "progress_reward (battery increase)",
+    "cost_penalty (price * charge_rate)",
+    "overload_penalty (high grid load + high charging)",
+    "time_penalty (encourages faster completion)"
+  ],
+  "termination_conditions": [
+    "battery full (charge_level >= 1.0)",
+    "maximum step count reached"
+  ],
+  "evaluation_output": {
+    "format": "json",
+    "fields": ["avg_reward", "avg_steps", "episodes"]
+  }
+}

openenv.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+id: EVChargeEnv-v0
+name: EVChargeEnv
+version: "0.1.0"
+description: >
+  EVChargeEnv is a continuous-control electric vehicle charging environment
+  with dynamic pricing, fluctuating grid load, and multi-objective reward signals.
+  It is suitable for benchmarking agentic behavior and testing adaptation
+  to non-stationary conditions.
+authors:
+  - name: Ozan Özayranci
+    github: "https://github.com/oozan"
+license: mit
+environment:
+  observation_space:
+    shape: [4]
+    type: box
+    description:
+      - charge_level (0–1)
+      - price (0–1)
+      - grid_load (0–1)
+      - time_step_norm (0–1)
+  action_space:
+    shape: [1]
+    type: box
+    description: continuous charge rate (0–1)
+  reward_components:
+    - progress_reward
+    - cost_penalty
+    - overload_penalty
+    - time_penalty
+  termination_conditions:
+    - charge >= 1.0
+    - max_steps reached
+scenarios:
+  - easy
+  - medium
+  - hard
+entry_point: env.ev_charge_env:EVChargeEnv
+tags:
+  - energy
+  - control
+  - continuous
+  - stochastic
+  - reinforcement-learning
+  - openenv

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gymnasium
2	+ numpy

run_evaluation.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import json
+from env.ev_charge_env import EVChargeEnv
+from agent.baseline_agent import BaselineAgent
+def run_episode(env, agent):
+    obs, _ = env.reset()
+    total_reward = 0.0
+    steps = 0
+    while True:
+        action = agent.select_action(obs)
+        obs, reward, terminated, truncated, _ = env.step(action)
+        total_reward += reward
+        steps += 1
+        if terminated or truncated or steps >= 200:
+            break
+    return total_reward, steps
+def main():
+    env = EVChargeEnv()
+    agent = BaselineAgent()
+    rewards = []
+    steps_list = []
+    for _ in range(5):
+        total_reward, steps = run_episode(env, agent)
+        rewards.append(total_reward)
+        steps_list.append(steps)
+    output = {
+        "avg_reward": sum(rewards) / len(rewards),
+        "avg_steps": sum(steps_list) / len(steps_list),
+        "episodes": len(rewards)
+    }
+    print(json.dumps(output))
+    # Save JSON for reproducibility
+    with open("sample_output.json", "w") as f:
+        json.dump(output, f, indent=4)
+if __name__ == "__main__":
+    main()

run_price_aware_evaluation.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import json
+from env.ev_charge_env import EVChargeEnv
+from agent.price_aware_agent import PriceAwareAgent
+def run_episode(env, agent, seed=None):
+    obs, _ = env.reset(seed=seed)
+    total_reward = 0.0
+    steps = 0
+    while True:
+        action = agent.select_action(obs)
+        obs, reward, terminated, truncated, _ = env.step(action)
+        total_reward += reward
+        steps += 1
+        if terminated or truncated:
+            break
+    return total_reward, steps
+def main():
+    # You can change scenario to "easy" / "medium" / "hard"
+    env = EVChargeEnv(scenario="medium")
+    agent = PriceAwareAgent()
+    rewards = []
+    steps_list = []
+    num_episodes = 10
+    for i in range(num_episodes):
+        total_reward, steps = run_episode(env, agent, seed=i)
+        rewards.append(total_reward)
+        steps_list.append(steps)
+    output = {
+        "agent_type": "price_aware",
+        "scenario": "medium",
+        "avg_reward": sum(rewards) / len(rewards),
+        "avg_steps": sum(steps_list) / len(steps_list),
+        "episodes": num_episodes,
+    }
+    print(json.dumps(output, indent=2))
+if __name__ == "__main__":
+    main()

sample_output.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "avg_reward": -9.145111057869848,
+    "avg_steps": 20.2,
+    "episodes": 5
+}