Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +82 -0
- README.md +152 -5
- __init__.py +17 -0
- client.py +120 -0
- energy_optimization_ppo.zip +3 -0
- gym_wrapper.py +99 -0
- inference.py +234 -0
- models.py +74 -0
- openenv-energy-rl/Dockerfile +5 -0
- openenv-energy-rl/README.md +26 -0
- openenv-energy-rl/environment.py +30 -0
- openenv-energy-rl/inference.py +21 -0
- openenv-energy-rl/requirements.txt +7 -0
- openenv.yaml +7 -0
- openenv_he_demo.egg-info/PKG-INFO +14 -0
- openenv_he_demo.egg-info/SOURCES.txt +25 -0
- openenv_he_demo.egg-info/dependency_links.txt +1 -0
- openenv_he_demo.egg-info/entry_points.txt +2 -0
- openenv_he_demo.egg-info/requires.txt +10 -0
- openenv_he_demo.egg-info/top_level.txt +1 -0
- pyproject.toml +45 -0
- server/__init__.py +11 -0
- server/app.py +80 -0
- server/he_demo_environment.py +318 -0
- server/requirements.txt +6 -0
- test_environment.py +103 -0
- train_agent.py +92 -0
- uv.lock +0 -0
- validate.py +67 -0
Dockerfile
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=he_demo
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Install dependencies using uv sync
|
| 42 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
+
if [ -f uv.lock ]; then \
|
| 45 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
+
else \
|
| 47 |
+
uv sync --no-install-project --no-editable; \
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
+
if [ -f uv.lock ]; then \
|
| 52 |
+
uv sync --frozen --no-editable; \
|
| 53 |
+
else \
|
| 54 |
+
uv sync --no-editable; \
|
| 55 |
+
fi
|
| 56 |
+
|
| 57 |
+
# Final runtime stage
|
| 58 |
+
FROM ${BASE_IMAGE}
|
| 59 |
+
|
| 60 |
+
WORKDIR /app
|
| 61 |
+
|
| 62 |
+
# Copy the virtual environment from builder
|
| 63 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
+
|
| 65 |
+
# Copy the environment code
|
| 66 |
+
COPY --from=builder /app/env /app/env
|
| 67 |
+
|
| 68 |
+
# Set PATH to use the virtual environment
|
| 69 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
+
|
| 71 |
+
# Set PYTHONPATH so imports work correctly
|
| 72 |
+
ENV PYTHONPATH="/app:$PYTHONPATH"
|
| 73 |
+
|
| 74 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 75 |
+
|
| 76 |
+
# Health check
|
| 77 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 78 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 79 |
+
|
| 80 |
+
# Run the FastAPI server
|
| 81 |
+
# The module path is constructed to work with the /app/env structure
|
| 82 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn he_demo.server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,157 @@
|
|
| 1 |
---
|
| 2 |
-
title: Energy Optimization
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Energy & Memory RAM Optimization Environment
|
| 3 |
+
emoji: ⚡
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
+
- reinforcement-learning
|
| 13 |
+
- energy-optimization
|
| 14 |
+
- resource-management
|
| 15 |
---
|
| 16 |
|
| 17 |
+
# Energy & Memory RAM Optimization RL Environment
|
| 18 |
+
|
| 19 |
+
An OpenEnv-based reinforcement learning environment for training AI agents to optimize energy consumption and RAM usage in computer systems. The environment features tasks of increasing difficulty, automated graders for task completion verification, and sophisticated reward logic.
|
| 20 |
+
|
| 21 |
+
## Features
|
| 22 |
+
|
| 23 |
+
### AI Agent Capabilities
|
| 24 |
+
- **Resource Detection**: Real-time monitoring of RAM usage and energy consumption
|
| 25 |
+
- **Optimization Strategies**: Multiple action types for different optimization approaches
|
| 26 |
+
- **Adaptive Learning**: Agents learn to balance competing objectives (RAM vs energy efficiency)
|
| 27 |
+
|
| 28 |
+
### Task Progression
|
| 29 |
+
Tasks increase in difficulty from basic resource reduction to advanced multi-objective optimization:
|
| 30 |
+
|
| 31 |
+
1. **Basic RAM Reduction**: Reduce RAM usage below 70%
|
| 32 |
+
2. **Energy Optimization**: Reduce energy consumption below 6 kWh while maintaining RAM below 75%
|
| 33 |
+
3. **Balanced Optimization**: Balance RAM below 60% and energy below 5 kWh
|
| 34 |
+
4. **Advanced Efficiency**: Achieve RAM below 50% and energy below 4 kWh
|
| 35 |
+
5. **Expert Optimization**: Master level: RAM below 40% and energy below 3 kWh
|
| 36 |
+
|
| 37 |
+
### Automated Graders
|
| 38 |
+
- **Task Completion Verification**: Automatic checking of optimization targets
|
| 39 |
+
- **Performance Metrics**: Efficiency scores and progress tracking
|
| 40 |
+
- **Reward Validation**: Ensures fair scoring based on actual improvements
|
| 41 |
+
|
| 42 |
+
### Reward Logic
|
| 43 |
+
- **Action Effectiveness**: Rewards based on actual resource reductions achieved
|
| 44 |
+
- **Task Completion Bonuses**: Significant rewards for meeting task objectives
|
| 45 |
+
- **Efficiency Incentives**: Bonuses for overall system optimization
|
| 46 |
+
- **Penalty System**: Penalties for aggressive actions that may cause system instability
|
| 47 |
+
|
| 48 |
+
## Quick Start
|
| 49 |
+
|
| 50 |
+
### Installation
|
| 51 |
+
```bash
|
| 52 |
+
# Install dependencies
|
| 53 |
+
pip install -r requirements.txt
|
| 54 |
+
|
| 55 |
+
# Or using uv (recommended)
|
| 56 |
+
uv sync
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
### Running the Environment
|
| 60 |
+
```bash
|
| 61 |
+
# Start the OpenEnv server
|
| 62 |
+
uv run server
|
| 63 |
+
|
| 64 |
+
# The server will be available at http://localhost:8000
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
### Training an Agent
|
| 68 |
+
```python
|
| 69 |
+
from stable_baselines3 import PPO
|
| 70 |
+
from openenv.client import OpenEnvClient
|
| 71 |
+
|
| 72 |
+
# Connect to the environment
|
| 73 |
+
client = OpenEnvClient("http://localhost:8000")
|
| 74 |
+
|
| 75 |
+
# Create and train agent
|
| 76 |
+
model = PPO("MlpPolicy", client, verbose=1)
|
| 77 |
+
model.learn(total_timesteps=10000)
|
| 78 |
+
|
| 79 |
+
# Evaluate the trained agent
|
| 80 |
+
obs = client.reset()
|
| 81 |
+
total_reward = 0
|
| 82 |
+
while not obs.done:
|
| 83 |
+
action, _ = model.predict(obs)
|
| 84 |
+
obs = client.step(action)
|
| 85 |
+
total_reward += obs.reward
|
| 86 |
+
print(f"Step reward: {obs.reward:.2f}, Total: {total_reward:.2f}")
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
## Docker
|
| 90 |
+
|
| 91 |
+
```bash
|
| 92 |
+
# Build the container
|
| 93 |
+
docker build -t energy-optimization-rl .
|
| 94 |
+
|
| 95 |
+
# Run the environment
|
| 96 |
+
docker run --rm -p 8000:8000 energy-optimization-rl
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
## Environment Details
|
| 100 |
+
|
| 101 |
+
### State Space
|
| 102 |
+
- RAM usage percentage (0-100%)
|
| 103 |
+
- Energy consumption in kWh
|
| 104 |
+
- System load (0-1)
|
| 105 |
+
- Current task information
|
| 106 |
+
- Task completion progress
|
| 107 |
+
- Efficiency scores
|
| 108 |
+
|
| 109 |
+
### Action Space
|
| 110 |
+
- `reduce_ram`: Focus on RAM optimization with configurable intensity (0.0-1.0)
|
| 111 |
+
- `optimize_energy`: Focus on energy reduction with configurable intensity (0.0-1.0)
|
| 112 |
+
- `balance_resources`: Balanced approach to both resources
|
| 113 |
+
- `monitor_system`: Gather system information and slight load reduction
|
| 114 |
+
|
| 115 |
+
### Reward Structure
|
| 116 |
+
- Base rewards for resource reductions
|
| 117 |
+
- Task completion bonuses (difficulty × 10 points)
|
| 118 |
+
- Efficiency improvement bonuses
|
| 119 |
+
- Penalties for system instability from aggressive actions
|
| 120 |
+
|
| 121 |
+
## API Endpoints
|
| 122 |
+
|
| 123 |
+
- `POST /reset`: Reset the environment
|
| 124 |
+
- `POST /step`: Execute an optimization action
|
| 125 |
+
- `GET /state`: Get current environment state
|
| 126 |
+
- `GET /schema`: Get action/observation schemas
|
| 127 |
+
- `WS /ws`: WebSocket endpoint for persistent sessions
|
| 128 |
+
|
| 129 |
+
## Development
|
| 130 |
+
|
| 131 |
+
### Project Structure
|
| 132 |
+
```
|
| 133 |
+
he_demo/
|
| 134 |
+
├── models.py # Action and observation definitions
|
| 135 |
+
├── server/
|
| 136 |
+
│ ├── app.py # FastAPI server application
|
| 137 |
+
│ └── he_demo_environment.py # Environment implementation
|
| 138 |
+
├── client.py # Example client code
|
| 139 |
+
├── inference.py # Training and inference scripts
|
| 140 |
+
├── Dockerfile # Container configuration
|
| 141 |
+
├── pyproject.toml # Project dependencies
|
| 142 |
+
└── README.md # This file
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
### Adding New Tasks
|
| 146 |
+
Tasks are defined in the `_create_tasks()` method of `EnergyOptimizationEnvironment`. Each task includes:
|
| 147 |
+
- Name and description
|
| 148 |
+
- Difficulty level
|
| 149 |
+
- RAM and energy targets
|
| 150 |
+
- Maximum steps allowed
|
| 151 |
+
|
| 152 |
+
### Customizing Reward Logic
|
| 153 |
+
Modify the `_calculate_reward()` method to implement custom reward strategies based on your specific optimization goals.
|
| 154 |
+
|
| 155 |
+
## License
|
| 156 |
+
|
| 157 |
+
This project is licensed under the BSD-style license. See LICENSE file for details.
|
__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Energy & Memory RAM Optimization Environment."""
|
| 8 |
+
|
| 9 |
+
from .client import EnergyOptimizationEnv
|
| 10 |
+
from .models import EnergyOptimizationAction, EnergyOptimizationObservation, Task
|
| 11 |
+
|
| 12 |
+
__all__ = [
|
| 13 |
+
"EnergyOptimizationAction",
|
| 14 |
+
"EnergyOptimizationObservation",
|
| 15 |
+
"Task",
|
| 16 |
+
"EnergyOptimizationEnv",
|
| 17 |
+
]
|
client.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""He Demo Environment Client."""
|
| 8 |
+
|
| 9 |
+
from typing import Dict
|
| 10 |
+
|
| 11 |
+
from openenv.core import EnvClient
|
| 12 |
+
from openenv.core.client_types import StepResult
|
| 13 |
+
from openenv.core.env_server.types import State
|
| 14 |
+
|
| 15 |
+
from .models import EnergyOptimizationAction, EnergyOptimizationObservation, Task
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class EnergyOptimizationEnv(
|
| 19 |
+
EnvClient[EnergyOptimizationAction, EnergyOptimizationObservation, State]
|
| 20 |
+
):
|
| 21 |
+
"""
|
| 22 |
+
Client for the Energy & Memory RAM Optimization Environment.
|
| 23 |
+
|
| 24 |
+
This client maintains a persistent WebSocket connection to the environment server,
|
| 25 |
+
enabling efficient multi-step interactions with lower latency.
|
| 26 |
+
Each client instance has its own dedicated environment session on the server.
|
| 27 |
+
|
| 28 |
+
Example:
|
| 29 |
+
>>> # Connect to a running server
|
| 30 |
+
>>> with EnergyOptimizationEnv(base_url="http://localhost:8000") as client:
|
| 31 |
+
... result = client.reset()
|
| 32 |
+
... print(f"RAM: {result.observation.ram_usage:.1f}%, Energy: {result.observation.energy_consumption:.1f} kWh")
|
| 33 |
+
...
|
| 34 |
+
... result = client.step(EnergyOptimizationAction(action_type="reduce_ram", intensity=0.8))
|
| 35 |
+
... print(f"Task: {result.observation.current_task.name if result.observation.current_task else 'None'}")
|
| 36 |
+
|
| 37 |
+
Example with Docker:
|
| 38 |
+
>>> # Automatically start container and connect
|
| 39 |
+
>>> client = EnergyOptimizationEnv.from_docker_image("energy-optimization-env:latest")
|
| 40 |
+
>>> try:
|
| 41 |
+
... result = client.reset()
|
| 42 |
+
... result = client.step(EnergyOptimizationAction(action_type="balance_resources", intensity=0.6))
|
| 43 |
+
... finally:
|
| 44 |
+
... client.close()
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
def _step_payload(self, action: EnergyOptimizationAction) -> Dict:
|
| 48 |
+
"""
|
| 49 |
+
Convert EnergyOptimizationAction to JSON payload for step message.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
action: EnergyOptimizationAction instance
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
Dictionary representation suitable for JSON encoding
|
| 56 |
+
"""
|
| 57 |
+
return {
|
| 58 |
+
"action_type": action.action_type,
|
| 59 |
+
"intensity": action.intensity,
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
def _parse_result(self, payload: Dict) -> StepResult[EnergyOptimizationObservation]:
|
| 63 |
+
"""
|
| 64 |
+
Parse server response into StepResult[EnergyOptimizationObservation].
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
payload: JSON response data from server
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
StepResult with EnergyOptimizationObservation
|
| 71 |
+
"""
|
| 72 |
+
obs_data = payload.get("observation", {})
|
| 73 |
+
|
| 74 |
+
# Parse current task if present
|
| 75 |
+
current_task = None
|
| 76 |
+
if obs_data.get("current_task"):
|
| 77 |
+
task_data = obs_data["current_task"]
|
| 78 |
+
current_task = Task(
|
| 79 |
+
name=task_data.get("name", ""),
|
| 80 |
+
description=task_data.get("description", ""),
|
| 81 |
+
difficulty=task_data.get("difficulty", 1),
|
| 82 |
+
ram_target=task_data.get("ram_target", 100.0),
|
| 83 |
+
energy_target=task_data.get("energy_target", 10.0),
|
| 84 |
+
max_steps=task_data.get("max_steps", 10)
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
observation = EnergyOptimizationObservation(
|
| 88 |
+
ram_usage=obs_data.get("ram_usage", 0.0),
|
| 89 |
+
energy_consumption=obs_data.get("energy_consumption", 0.0),
|
| 90 |
+
system_load=obs_data.get("system_load", 0.0),
|
| 91 |
+
current_task=current_task,
|
| 92 |
+
tasks_completed=obs_data.get("tasks_completed", []),
|
| 93 |
+
steps_taken=obs_data.get("steps_taken", 0),
|
| 94 |
+
task_progress=obs_data.get("task_progress", 0.0),
|
| 95 |
+
efficiency_score=obs_data.get("efficiency_score", 0.0),
|
| 96 |
+
done=payload.get("done", False),
|
| 97 |
+
reward=payload.get("reward"),
|
| 98 |
+
metadata=obs_data.get("metadata", {}),
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
return StepResult(
|
| 102 |
+
observation=observation,
|
| 103 |
+
reward=payload.get("reward"),
|
| 104 |
+
done=payload.get("done", False),
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
def _parse_state(self, payload: Dict) -> State:
|
| 108 |
+
"""
|
| 109 |
+
Parse server response into State object.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
payload: JSON response from state request
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
State object with episode_id and step_count
|
| 116 |
+
"""
|
| 117 |
+
return State(
|
| 118 |
+
episode_id=payload.get("episode_id"),
|
| 119 |
+
step_count=payload.get("step_count", 0),
|
| 120 |
+
)
|
energy_optimization_ppo.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c41d1d22fd926eb568f72d6e03fc4eed49133f27371d2899e7d42264b0d4291
|
| 3 |
+
size 146140
|
gym_wrapper.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Gym wrapper for the Energy Optimization Environment.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
import gymnasium as gym
|
| 9 |
+
import numpy as np
|
| 10 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
| 11 |
+
|
| 12 |
+
# Mock the he_demo package
|
| 13 |
+
import types
|
| 14 |
+
he_demo = types.ModuleType('he_demo')
|
| 15 |
+
from models import EnergyOptimizationAction, EnergyOptimizationObservation, Task, TaskSummary
|
| 16 |
+
he_demo.EnergyOptimizationAction = EnergyOptimizationAction
|
| 17 |
+
he_demo.EnergyOptimizationObservation = EnergyOptimizationObservation
|
| 18 |
+
he_demo.Task = Task
|
| 19 |
+
he_demo.TaskSummary = TaskSummary
|
| 20 |
+
sys.modules['he_demo'] = he_demo
|
| 21 |
+
sys.modules['he_demo.models'] = he_demo
|
| 22 |
+
|
| 23 |
+
from server.he_demo_environment import EnergyOptimizationEnvironment
|
| 24 |
+
|
| 25 |
+
class EnergyOptimizationGymEnv(gym.Env):
|
| 26 |
+
"""Gym wrapper for the Energy Optimization Environment."""
|
| 27 |
+
|
| 28 |
+
def __init__(self):
|
| 29 |
+
super().__init__()
|
| 30 |
+
|
| 31 |
+
# Create the underlying environment
|
| 32 |
+
self.env = EnergyOptimizationEnvironment()
|
| 33 |
+
|
| 34 |
+
# Define action and observation spaces
|
| 35 |
+
# Actions: [action_type_index, intensity]
|
| 36 |
+
# action_type_index: 0=reduce_ram, 1=optimize_energy, 2=balance_resources, 3=monitor_system
|
| 37 |
+
self.action_space = gym.spaces.Box(
|
| 38 |
+
low=np.array([0, 0.0]),
|
| 39 |
+
high=np.array([3, 1.0]),
|
| 40 |
+
dtype=np.float32
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# Observations: [ram_usage, energy_consumption, system_load, task_progress, efficiency_score, steps_taken]
|
| 44 |
+
self.observation_space = gym.spaces.Box(
|
| 45 |
+
low=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0]),
|
| 46 |
+
high=np.array([100.0, 10.0, 1.0, 1.0, 1.0, 100]),
|
| 47 |
+
dtype=np.float32
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
def reset(self, **kwargs):
|
| 51 |
+
"""Reset the environment."""
|
| 52 |
+
obs = self.env.reset()
|
| 53 |
+
return self._obs_to_array(obs), {}
|
| 54 |
+
|
| 55 |
+
def step(self, action):
|
| 56 |
+
"""Execute an action in the environment."""
|
| 57 |
+
# Convert action array to EnergyOptimizationAction
|
| 58 |
+
action_type_index = int(action[0])
|
| 59 |
+
intensity = float(action[1])
|
| 60 |
+
|
| 61 |
+
action_types = ["reduce_ram", "optimize_energy", "balance_resources", "monitor_system"]
|
| 62 |
+
action_type = action_types[action_type_index]
|
| 63 |
+
|
| 64 |
+
action_obj = EnergyOptimizationAction(action_type=action_type, intensity=intensity)
|
| 65 |
+
obs = self.env.step(action_obj)
|
| 66 |
+
|
| 67 |
+
# Convert observation to array
|
| 68 |
+
obs_array = self._obs_to_array(obs)
|
| 69 |
+
|
| 70 |
+
# Check if episode is done
|
| 71 |
+
done = obs.done
|
| 72 |
+
|
| 73 |
+
# Return reward
|
| 74 |
+
reward = obs.reward
|
| 75 |
+
|
| 76 |
+
return obs_array, reward, done, False, {}
|
| 77 |
+
|
| 78 |
+
def _obs_to_array(self, obs):
|
| 79 |
+
"""Convert EnergyOptimizationObservation to numpy array."""
|
| 80 |
+
return np.array([
|
| 81 |
+
obs.ram_usage,
|
| 82 |
+
obs.energy_consumption,
|
| 83 |
+
obs.system_load,
|
| 84 |
+
obs.task_progress,
|
| 85 |
+
obs.efficiency_score,
|
| 86 |
+
obs.steps_taken
|
| 87 |
+
], dtype=np.float32)
|
| 88 |
+
|
| 89 |
+
def render(self, mode="human"):
|
| 90 |
+
"""Render the environment."""
|
| 91 |
+
obs = self.env._get_current_observation()
|
| 92 |
+
if obs:
|
| 93 |
+
print(f"RAM: {obs.ram_usage:.1f}%, Energy: {obs.energy_consumption:.1f}kWh, "
|
| 94 |
+
f"Task: {obs.current_task.name if obs.current_task else 'None'}, "
|
| 95 |
+
f"Progress: {obs.task_progress:.2f}")
|
| 96 |
+
|
| 97 |
+
def close(self):
|
| 98 |
+
"""Close the environment."""
|
| 99 |
+
pass
|
inference.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Energy & Memory RAM Optimization Inference Script
|
| 3 |
+
=================================================
|
| 4 |
+
This script demonstrates how an AI agent can learn to optimize energy consumption
|
| 5 |
+
and RAM usage through reinforcement learning in the Energy Optimization Environment.
|
| 6 |
+
|
| 7 |
+
The agent uses an LLM to make strategic decisions about resource optimization actions.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import textwrap
|
| 12 |
+
from typing import List, Optional
|
| 13 |
+
|
| 14 |
+
from openai import OpenAI
|
| 15 |
+
|
| 16 |
+
from he_demo.client import EnergyOptimizationEnv
|
| 17 |
+
from he_demo.models import EnergyOptimizationAction
|
| 18 |
+
|
| 19 |
+
IMAGE_NAME = os.getenv("IMAGE_NAME")
|
| 20 |
+
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
|
| 21 |
+
|
| 22 |
+
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 23 |
+
MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
|
| 24 |
+
TASK_NAME = os.getenv("ENERGY_TASK", "energy_optimization")
|
| 25 |
+
BENCHMARK = os.getenv("ENERGY_BENCHMARK", "energy_optimization")
|
| 26 |
+
MAX_STEPS = 50 # More steps for complex optimization tasks
|
| 27 |
+
TEMPERATURE = 0.3 # Lower temperature for more consistent optimization decisions
|
| 28 |
+
MAX_TOKENS = 100
|
| 29 |
+
SUCCESS_SCORE_THRESHOLD = 0.5 # Higher threshold for meaningful optimization
|
| 30 |
+
|
| 31 |
+
# Max possible reward: task completion bonuses + efficiency improvements
|
| 32 |
+
MAX_TOTAL_REWARD = 100.0 # Estimated maximum possible reward
|
| 33 |
+
|
| 34 |
+
SYSTEM_PROMPT = textwrap.dedent(
|
| 35 |
+
"""
|
| 36 |
+
You are an AI system optimization agent. Your goal is to optimize computer system resources:
|
| 37 |
+
- Reduce RAM usage (target: below 40%)
|
| 38 |
+
- Minimize energy consumption (target: below 3 kWh)
|
| 39 |
+
- Complete optimization tasks efficiently
|
| 40 |
+
|
| 41 |
+
Available actions:
|
| 42 |
+
- reduce_ram: Focus on RAM optimization (intensity 0.0-1.0)
|
| 43 |
+
- optimize_energy: Focus on energy reduction (intensity 0.0-1.0)
|
| 44 |
+
- balance_resources: Balanced approach to both resources
|
| 45 |
+
- monitor_system: Gather system information
|
| 46 |
+
|
| 47 |
+
Action format: action_type,intensity
|
| 48 |
+
Example: reduce_ram,0.8
|
| 49 |
+
|
| 50 |
+
Consider current system state, task requirements, and potential trade-offs.
|
| 51 |
+
Reply with exactly one action in the format: action_type,intensity
|
| 52 |
+
"""
|
| 53 |
+
).strip()
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def log_start(task: str, env: str, model: str) -> None:
|
| 57 |
+
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def log_step(
|
| 61 |
+
step: int, action: str, reward: float, done: bool, error: Optional[str]
|
| 62 |
+
) -> None:
|
| 63 |
+
error_val = error if error else "null"
|
| 64 |
+
done_val = str(done).lower()
|
| 65 |
+
print(
|
| 66 |
+
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
|
| 67 |
+
flush=True,
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 72 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 73 |
+
print(
|
| 74 |
+
f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
|
| 75 |
+
flush=True,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def build_user_prompt(
|
| 80 |
+
step: int, observation, last_reward: float, history: List[str]
|
| 81 |
+
) -> str:
|
| 82 |
+
current_task_info = ""
|
| 83 |
+
if observation.current_task:
|
| 84 |
+
task = observation.current_task
|
| 85 |
+
current_task_info = f"""
|
| 86 |
+
Current Task: {task.name}
|
| 87 |
+
Description: {task.description}
|
| 88 |
+
Targets: RAM < {task.ram_target}%, Energy < {task.energy_target} kWh
|
| 89 |
+
Max Steps: {task.max_steps}
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
history_block = "\n".join(history[-3:]) if history else "None"
|
| 93 |
+
|
| 94 |
+
return textwrap.dedent(
|
| 95 |
+
f"""
|
| 96 |
+
Step: {step}
|
| 97 |
+
System State:
|
| 98 |
+
- RAM Usage: {observation.ram_usage:.1f}%
|
| 99 |
+
- Energy Consumption: {observation.energy_consumption:.1f} kWh
|
| 100 |
+
- System Load: {observation.system_load:.2f}
|
| 101 |
+
- Efficiency Score: {observation.efficiency_score:.2f}
|
| 102 |
+
- Task Progress: {observation.task_progress:.2f}
|
| 103 |
+
- Steps Taken: {observation.steps_taken}
|
| 104 |
+
|
| 105 |
+
{current_task_info}
|
| 106 |
+
Tasks Completed: {', '.join(observation.tasks_completed) if observation.tasks_completed else 'None'}
|
| 107 |
+
|
| 108 |
+
Last Reward: {last_reward:.2f}
|
| 109 |
+
Recent Actions:
|
| 110 |
+
{history_block}
|
| 111 |
+
|
| 112 |
+
Choose your next optimization action (action_type,intensity):
|
| 113 |
+
"""
|
| 114 |
+
).strip()
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def parse_action(action_str: str) -> EnergyOptimizationAction:
|
| 118 |
+
"""Parse action string into EnergyOptimizationAction."""
|
| 119 |
+
try:
|
| 120 |
+
parts = action_str.strip().split(',')
|
| 121 |
+
if len(parts) != 2:
|
| 122 |
+
raise ValueError("Invalid action format")
|
| 123 |
+
|
| 124 |
+
action_type = parts[0].strip()
|
| 125 |
+
intensity = float(parts[1].strip())
|
| 126 |
+
|
| 127 |
+
# Validate action type
|
| 128 |
+
valid_actions = ["reduce_ram", "optimize_energy", "balance_resources", "monitor_system"]
|
| 129 |
+
if action_type not in valid_actions:
|
| 130 |
+
action_type = "monitor_system" # Default fallback
|
| 131 |
+
|
| 132 |
+
# Clamp intensity to valid range
|
| 133 |
+
intensity = max(0.0, min(1.0, intensity))
|
| 134 |
+
|
| 135 |
+
return EnergyOptimizationAction(action_type=action_type, intensity=intensity)
|
| 136 |
+
except Exception:
|
| 137 |
+
# Return safe default action
|
| 138 |
+
return EnergyOptimizationAction(action_type="monitor_system", intensity=0.5)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def get_model_action(
|
| 142 |
+
client: OpenAI, step: int, observation, last_reward: float, history: List[str]
|
| 143 |
+
) -> EnergyOptimizationAction:
|
| 144 |
+
"""Get optimization action from the language model."""
|
| 145 |
+
user_prompt = build_user_prompt(step, observation, last_reward, history)
|
| 146 |
+
try:
|
| 147 |
+
completion = client.chat.completions.create(
|
| 148 |
+
model=MODEL_NAME,
|
| 149 |
+
messages=[
|
| 150 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 151 |
+
{"role": "user", "content": user_prompt},
|
| 152 |
+
],
|
| 153 |
+
temperature=TEMPERATURE,
|
| 154 |
+
max_tokens=MAX_TOKENS,
|
| 155 |
+
stream=False,
|
| 156 |
+
)
|
| 157 |
+
action_text = (completion.choices[0].message.content or "").strip()
|
| 158 |
+
return parse_action(action_text)
|
| 159 |
+
except Exception as exc:
|
| 160 |
+
print(f"[DEBUG] Model request failed: {exc}", flush=True)
|
| 161 |
+
return EnergyOptimizationAction(action_type="monitor_system", intensity=0.5)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def main() -> None:
|
| 165 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 166 |
+
|
| 167 |
+
env = (
|
| 168 |
+
EnergyOptimizationEnv.from_docker_image(IMAGE_NAME)
|
| 169 |
+
if IMAGE_NAME
|
| 170 |
+
else EnergyOptimizationEnv(base_url="http://localhost:8000")
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
history: List[str] = []
|
| 174 |
+
rewards: List[float] = []
|
| 175 |
+
steps_taken = 0
|
| 176 |
+
score = 0.0
|
| 177 |
+
success = False
|
| 178 |
+
|
| 179 |
+
log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
result = env.reset()
|
| 183 |
+
last_reward = 0.0
|
| 184 |
+
|
| 185 |
+
for step in range(1, MAX_STEPS + 1):
|
| 186 |
+
if result.done:
|
| 187 |
+
break
|
| 188 |
+
|
| 189 |
+
# Get action from model
|
| 190 |
+
action = get_model_action(client, step, result.observation, last_reward, history)
|
| 191 |
+
|
| 192 |
+
# Execute action
|
| 193 |
+
result = env.step(action)
|
| 194 |
+
obs = result.observation
|
| 195 |
+
|
| 196 |
+
reward = result.reward or 0.0
|
| 197 |
+
done = result.done
|
| 198 |
+
error = None
|
| 199 |
+
|
| 200 |
+
# Format action for logging
|
| 201 |
+
action_str = f"{action.action_type},{action.intensity:.1f}"
|
| 202 |
+
|
| 203 |
+
rewards.append(reward)
|
| 204 |
+
steps_taken = step
|
| 205 |
+
last_reward = reward
|
| 206 |
+
|
| 207 |
+
log_step(step=step, action=action_str, reward=reward, done=done, error=error)
|
| 208 |
+
|
| 209 |
+
# Update history
|
| 210 |
+
history.append(f"Step {step}: {action_str} -> reward {reward:+.2f}")
|
| 211 |
+
|
| 212 |
+
if done:
|
| 213 |
+
break
|
| 214 |
+
|
| 215 |
+
# Calculate final score based on tasks completed and efficiency
|
| 216 |
+
total_reward = sum(rewards)
|
| 217 |
+
tasks_completed = len(result.observation.tasks_completed) if result.observation.tasks_completed else 0
|
| 218 |
+
efficiency_score = result.observation.efficiency_score
|
| 219 |
+
|
| 220 |
+
# Score combines task completion and efficiency
|
| 221 |
+
score = (tasks_completed / 5.0) * 0.6 + (efficiency_score / 1.0) * 0.4
|
| 222 |
+
score = min(max(score, 0.0), 1.0) # clamp to [0, 1]
|
| 223 |
+
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 224 |
+
|
| 225 |
+
finally:
|
| 226 |
+
try:
|
| 227 |
+
env.close()
|
| 228 |
+
except Exception as e:
|
| 229 |
+
print(f"[DEBUG] env.close() error (container cleanup): {e}", flush=True)
|
| 230 |
+
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
if __name__ == "__main__":
|
| 234 |
+
main()
|
models.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Data models for the Energy & Memory RAM Optimization Environment.
|
| 9 |
+
|
| 10 |
+
This environment simulates system resource optimization tasks where an AI agent
|
| 11 |
+
must optimize RAM usage and energy consumption through various actions.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from typing import List, Optional
|
| 15 |
+
from openenv.core.env_server.types import Action, Observation
|
| 16 |
+
from pydantic import BaseModel, Field
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class EnergyOptimizationAction(Action):
|
| 20 |
+
"""Action for the Energy & Memory RAM Optimization environment."""
|
| 21 |
+
|
| 22 |
+
action_type: str = Field(
|
| 23 |
+
...,
|
| 24 |
+
description="Type of optimization action: 'reduce_ram', 'optimize_energy', 'balance_resources', 'monitor_system'"
|
| 25 |
+
)
|
| 26 |
+
intensity: float = Field(
|
| 27 |
+
1.0,
|
| 28 |
+
description="Intensity of the action (0.0 to 1.0), affects effectiveness and potential side effects"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class Task(BaseModel):
|
| 33 |
+
"""Represents an optimization task with difficulty and requirements."""
|
| 34 |
+
|
| 35 |
+
name: str = Field(..., description="Unique name of the task")
|
| 36 |
+
description: str = Field(..., description="Human-readable description of the task")
|
| 37 |
+
difficulty: int = Field(..., description="Difficulty level (1-5)")
|
| 38 |
+
ram_target: float = Field(..., description="Target RAM usage percentage (lower is better)")
|
| 39 |
+
energy_target: float = Field(..., description="Target energy consumption (lower is better)")
|
| 40 |
+
max_steps: int = Field(..., description="Maximum steps allowed to complete the task")
|
| 41 |
+
completed: bool = Field(default=False, description="Whether the task has been completed")
|
| 42 |
+
|
| 43 |
+
def check_completion(self, ram_usage: float, energy_consumption: float, steps_taken: int) -> bool:
|
| 44 |
+
"""Check if the task is completed based on current system state."""
|
| 45 |
+
if steps_taken > self.max_steps:
|
| 46 |
+
return False
|
| 47 |
+
return ram_usage <= self.ram_target and energy_consumption <= self.energy_target
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class TaskSummary(BaseModel):
|
| 51 |
+
"""Serializable task summary exposed in observations."""
|
| 52 |
+
|
| 53 |
+
name: str = Field(..., description="Task identifier")
|
| 54 |
+
description: str = Field(..., description="Task description")
|
| 55 |
+
difficulty: int = Field(..., description="Task difficulty level")
|
| 56 |
+
ram_target: float = Field(..., description="RAM usage target percentage")
|
| 57 |
+
energy_target: float = Field(..., description="Energy consumption target in kWh")
|
| 58 |
+
max_steps: int = Field(..., description="Maximum allowed steps for the task")
|
| 59 |
+
completed: bool = Field(False, description="Whether the task is completed")
|
| 60 |
+
remaining_steps: Optional[int] = Field(None, description="Remaining steps before the task deadline")
|
| 61 |
+
progress: float = Field(..., description="Estimated progress toward task completion (0-1)")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class EnergyOptimizationObservation(Observation):
|
| 65 |
+
"""Observation from the Energy & Memory RAM Optimization environment."""
|
| 66 |
+
|
| 67 |
+
ram_usage: float = Field(..., description="Current RAM usage percentage (0-100)")
|
| 68 |
+
energy_consumption: float = Field(..., description="Current energy consumption in kWh")
|
| 69 |
+
system_load: float = Field(..., description="Overall system load (0-1)")
|
| 70 |
+
current_task: Optional[TaskSummary] = Field(None, description="Current optimization task")
|
| 71 |
+
tasks_completed: List[str] = Field(default_factory=list, description="List of completed task names")
|
| 72 |
+
steps_taken: int = Field(..., description="Number of steps taken in current episode")
|
| 73 |
+
task_progress: float = Field(..., description="Progress towards current task completion (0-1)")
|
| 74 |
+
efficiency_score: float = Field(..., description="Overall efficiency score based on optimization")
|
openenv-energy-rl/Dockerfile
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
WORKDIR /app
|
| 3 |
+
COPY . .
|
| 4 |
+
RUN pip install torch transformers trl gym numpy pandas stable-baselines3
|
| 5 |
+
CMD ["python", "inference.py"]
|
openenv-energy-rl/README.md
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# OpenEnv Energy RL
|
| 2 |
+
|
| 3 |
+
A lightweight RL example environment for energy and memory optimization.
|
| 4 |
+
|
| 5 |
+
## Files
|
| 6 |
+
|
| 7 |
+
- `environment.py`: custom `gym.Env` implementation for RAM and electricity reduction.
|
| 8 |
+
- `inference.py`: trains a PPO agent and runs one episode.
|
| 9 |
+
- `Dockerfile`: containerizes the example.
|
| 10 |
+
- `requirements.txt`: dependency list for the example.
|
| 11 |
+
|
| 12 |
+
## Quick start
|
| 13 |
+
|
| 14 |
+
```bash
|
| 15 |
+
python -m venv venv
|
| 16 |
+
venv\Scripts\activate
|
| 17 |
+
pip install -r requirements.txt
|
| 18 |
+
python inference.py
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
## Docker
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
docker build -t openenv-energy-rl .
|
| 25 |
+
docker run --rm openenv-energy-rl
|
| 26 |
+
```
|
openenv-energy-rl/environment.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gym
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class EnergyEnv(gym.Env):
|
| 6 |
+
def __init__(self):
|
| 7 |
+
super(EnergyEnv, self).__init__()
|
| 8 |
+
self.state = [50.0, 5.0] # [RAM usage %, electricity kWh]
|
| 9 |
+
self.action_space = gym.spaces.Discrete(3) # 0=do nothing, 1=reduce RAM, 2=reduce electricity
|
| 10 |
+
self.observation_space = gym.spaces.Box(low=0.0, high=100.0, shape=(2,), dtype=np.float32)
|
| 11 |
+
|
| 12 |
+
def reset(self):
|
| 13 |
+
self.state = [50.0, 5.0]
|
| 14 |
+
return np.array(self.state, dtype=np.float32)
|
| 15 |
+
|
| 16 |
+
def step(self, action):
|
| 17 |
+
ram, elec = self.state
|
| 18 |
+
if action == 1:
|
| 19 |
+
ram = max(0.0, ram - 5.0)
|
| 20 |
+
elif action == 2:
|
| 21 |
+
elec = max(0.0, elec - 1.0)
|
| 22 |
+
|
| 23 |
+
reward = -(ram / 100.0 + elec / 10.0)
|
| 24 |
+
done = ram <= 0.0 or elec <= 0.0
|
| 25 |
+
self.state = [ram, elec]
|
| 26 |
+
|
| 27 |
+
return np.array(self.state, dtype=np.float32), reward, done, {}
|
| 28 |
+
|
| 29 |
+
def render(self, mode="human"):
|
| 30 |
+
print(f"RAM: {self.state[0]:.1f}%, Electricity: {self.state[1]:.1f} kWh")
|
openenv-energy-rl/inference.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from environment import EnergyEnv
|
| 2 |
+
from stable_baselines3 import PPO
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def main():
|
| 6 |
+
env = EnergyEnv()
|
| 7 |
+
model = PPO("MlpPolicy", env, verbose=1)
|
| 8 |
+
model.learn(total_timesteps=10000)
|
| 9 |
+
|
| 10 |
+
obs = env.reset()
|
| 11 |
+
done = False
|
| 12 |
+
step = 0
|
| 13 |
+
while not done:
|
| 14 |
+
action, _states = model.predict(obs)
|
| 15 |
+
obs, reward, done, info = env.step(action)
|
| 16 |
+
step += 1
|
| 17 |
+
print(f"Action: {int(action)} | Reward: {reward:.2f} | State: {obs.tolist()}")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
if __name__ == "__main__":
|
| 21 |
+
main()
|
openenv-energy-rl/requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
transformers
|
| 3 |
+
trl
|
| 4 |
+
gym
|
| 5 |
+
numpy
|
| 6 |
+
pandas
|
| 7 |
+
stable-baselines3
|
openenv.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: energy_optimization
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
|
openenv_he_demo.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-he_demo
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: He Demo environment for OpenEnv
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: openenv-core[core]>=0.2.2
|
| 7 |
+
Requires-Dist: numpy>=1.19.0
|
| 8 |
+
Requires-Dist: pandas>=1.3.0
|
| 9 |
+
Requires-Dist: gymnasium>=0.29.0
|
| 10 |
+
Requires-Dist: stable-baselines3>=2.0.0
|
| 11 |
+
Requires-Dist: torch>=2.0.0
|
| 12 |
+
Provides-Extra: dev
|
| 13 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 14 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_he_demo.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
__init__.py
|
| 3 |
+
client.py
|
| 4 |
+
inference.py
|
| 5 |
+
models.py
|
| 6 |
+
pyproject.toml
|
| 7 |
+
test_environment.py
|
| 8 |
+
validate.py
|
| 9 |
+
./__init__.py
|
| 10 |
+
./client.py
|
| 11 |
+
./gym_wrapper.py
|
| 12 |
+
./inference.py
|
| 13 |
+
./models.py
|
| 14 |
+
./test_environment.py
|
| 15 |
+
./train_agent.py
|
| 16 |
+
./validate.py
|
| 17 |
+
openenv_he_demo.egg-info/PKG-INFO
|
| 18 |
+
openenv_he_demo.egg-info/SOURCES.txt
|
| 19 |
+
openenv_he_demo.egg-info/dependency_links.txt
|
| 20 |
+
openenv_he_demo.egg-info/entry_points.txt
|
| 21 |
+
openenv_he_demo.egg-info/requires.txt
|
| 22 |
+
openenv_he_demo.egg-info/top_level.txt
|
| 23 |
+
server/__init__.py
|
| 24 |
+
server/app.py
|
| 25 |
+
server/he_demo_environment.py
|
openenv_he_demo.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_he_demo.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = he_demo.server.app:main
|
openenv_he_demo.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core[core]>=0.2.2
|
| 2 |
+
numpy>=1.19.0
|
| 3 |
+
pandas>=1.3.0
|
| 4 |
+
gymnasium>=0.29.0
|
| 5 |
+
stable-baselines3>=2.0.0
|
| 6 |
+
torch>=2.0.0
|
| 7 |
+
|
| 8 |
+
[dev]
|
| 9 |
+
pytest>=8.0.0
|
| 10 |
+
pytest-cov>=4.0.0
|
openenv_he_demo.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
he_demo
|
pyproject.toml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-he_demo"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "He Demo environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
+
# install from github
|
| 19 |
+
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
+
"openenv-core[core]>=0.2.2",
|
| 21 |
+
# Environment-specific dependencies
|
| 22 |
+
# Add all dependencies needed for your environment here
|
| 23 |
+
# Examples:
|
| 24 |
+
"numpy>=1.19.0",
|
| 25 |
+
"pandas>=1.3.0",
|
| 26 |
+
"gymnasium>=0.29.0",
|
| 27 |
+
"stable-baselines3>=2.0.0",
|
| 28 |
+
"torch>=2.0.0",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
[project.optional-dependencies]
|
| 32 |
+
dev = [
|
| 33 |
+
"pytest>=8.0.0",
|
| 34 |
+
"pytest-cov>=4.0.0",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
[project.scripts]
|
| 38 |
+
# Server entry point - enables running via: uv run --project . server
|
| 39 |
+
# or: python -m he_demo.server.app
|
| 40 |
+
server = "he_demo.server.app:main"
|
| 41 |
+
|
| 42 |
+
[tool.setuptools]
|
| 43 |
+
include-package-data = true
|
| 44 |
+
packages = ["he_demo", "he_demo.server"]
|
| 45 |
+
package-dir = { "he_demo" = ".", "he_demo.server" = "server" }
|
server/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Energy & Memory RAM Optimization environment server components."""
|
| 8 |
+
|
| 9 |
+
from .he_demo_environment import EnergyOptimizationEnvironment
|
| 10 |
+
|
| 11 |
+
__all__ = ["EnergyOptimizationEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
FastAPI application for the He Demo Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the HeDemoEnvironment
|
| 11 |
+
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
+
|
| 13 |
+
Endpoints:
|
| 14 |
+
- POST /reset: Reset the environment
|
| 15 |
+
- POST /step: Execute an action
|
| 16 |
+
- GET /state: Get current environment state
|
| 17 |
+
- GET /schema: Get action/observation schemas
|
| 18 |
+
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
+
|
| 20 |
+
Usage:
|
| 21 |
+
# Development (with auto-reload):
|
| 22 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 23 |
+
|
| 24 |
+
# Production:
|
| 25 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 26 |
+
|
| 27 |
+
# Or run directly:
|
| 28 |
+
python -m server.app
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
from openenv.core.env_server.http_server import create_app
|
| 33 |
+
except Exception as e: # pragma: no cover
|
| 34 |
+
raise ImportError(
|
| 35 |
+
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 36 |
+
) from e
|
| 37 |
+
|
| 38 |
+
from he_demo.models import EnergyOptimizationAction, EnergyOptimizationObservation
|
| 39 |
+
from he_demo.server.he_demo_environment import EnergyOptimizationEnvironment
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Create the app with web interface and README integration
|
| 43 |
+
app = create_app(
|
| 44 |
+
EnergyOptimizationEnvironment,
|
| 45 |
+
EnergyOptimizationAction,
|
| 46 |
+
EnergyOptimizationObservation,
|
| 47 |
+
env_name="energy_optimization",
|
| 48 |
+
max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def main(host: str = "0.0.0.0", port: int = 8000):
|
| 53 |
+
"""
|
| 54 |
+
Entry point for direct execution via uv run or python -m.
|
| 55 |
+
|
| 56 |
+
This function enables running the server without Docker:
|
| 57 |
+
uv run --project . server
|
| 58 |
+
uv run --project . server --port 8001
|
| 59 |
+
python -m he_demo.server.app
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
host: Host address to bind to (default: "0.0.0.0")
|
| 63 |
+
port: Port number to listen on (default: 8000)
|
| 64 |
+
|
| 65 |
+
For production deployments, consider using uvicorn directly with
|
| 66 |
+
multiple workers:
|
| 67 |
+
uvicorn he_demo.server.app:app --workers 4
|
| 68 |
+
"""
|
| 69 |
+
import uvicorn
|
| 70 |
+
|
| 71 |
+
uvicorn.run(app, host=host, port=port)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
import argparse
|
| 76 |
+
|
| 77 |
+
parser = argparse.ArgumentParser()
|
| 78 |
+
parser.add_argument("--port", type=int, default=8000)
|
| 79 |
+
args = parser.parse_args()
|
| 80 |
+
main(port=args.port)
|
server/he_demo_environment.py
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Energy & Memory RAM Optimization Environment Implementation.
|
| 9 |
+
|
| 10 |
+
An RL environment for training AI agents to optimize system resources including
|
| 11 |
+
RAM usage and energy consumption through various optimization strategies.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import random
|
| 15 |
+
from typing import List
|
| 16 |
+
from uuid import uuid4
|
| 17 |
+
|
| 18 |
+
from openenv.core.env_server.interfaces import Environment
|
| 19 |
+
from openenv.core.env_server.types import State
|
| 20 |
+
|
| 21 |
+
from he_demo.models import EnergyOptimizationAction, EnergyOptimizationObservation, Task, TaskSummary
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class EnergyOptimizationEnvironment(Environment):
|
| 25 |
+
"""
|
| 26 |
+
Energy & Memory RAM Optimization Environment.
|
| 27 |
+
|
| 28 |
+
This environment simulates a computer system where an AI agent must optimize
|
| 29 |
+
RAM usage and energy consumption. The agent faces tasks of increasing difficulty
|
| 30 |
+
and receives rewards based on optimization efficiency.
|
| 31 |
+
|
| 32 |
+
Tasks include:
|
| 33 |
+
- Basic RAM reduction
|
| 34 |
+
- Energy optimization
|
| 35 |
+
- Resource balancing
|
| 36 |
+
- Advanced multi-objective optimization
|
| 37 |
+
|
| 38 |
+
The environment includes automated graders that verify task completion and
|
| 39 |
+
provide detailed feedback on optimization performance.
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 43 |
+
|
| 44 |
+
def __init__(self):
|
| 45 |
+
"""Initialize the energy optimization environment."""
|
| 46 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 47 |
+
self._reset_count = 0
|
| 48 |
+
|
| 49 |
+
# System state
|
| 50 |
+
self.ram_usage = 80.0 # Starting RAM usage %
|
| 51 |
+
self.energy_consumption = 8.0 # Starting energy consumption kWh
|
| 52 |
+
self.system_load = 0.7 # Starting system load
|
| 53 |
+
|
| 54 |
+
# Task management
|
| 55 |
+
self.tasks = self._create_tasks()
|
| 56 |
+
self.current_task_index = 0
|
| 57 |
+
self.tasks_completed = []
|
| 58 |
+
|
| 59 |
+
# Performance tracking
|
| 60 |
+
self.baseline_ram = self.ram_usage
|
| 61 |
+
self.baseline_energy = self.energy_consumption
|
| 62 |
+
|
| 63 |
+
def _create_tasks(self) -> List[Task]:
|
| 64 |
+
"""Create tasks with increasing difficulty."""
|
| 65 |
+
return [
|
| 66 |
+
Task(
|
| 67 |
+
name="basic_ram_reduction",
|
| 68 |
+
description="Reduce RAM usage below 70%",
|
| 69 |
+
difficulty=1,
|
| 70 |
+
ram_target=70.0,
|
| 71 |
+
energy_target=7.5, # Slightly below initial 8.0
|
| 72 |
+
max_steps=10
|
| 73 |
+
),
|
| 74 |
+
Task(
|
| 75 |
+
name="energy_optimization",
|
| 76 |
+
description="Reduce energy consumption below 6 kWh while maintaining RAM below 75%",
|
| 77 |
+
difficulty=2,
|
| 78 |
+
ram_target=75.0,
|
| 79 |
+
energy_target=6.0,
|
| 80 |
+
max_steps=15
|
| 81 |
+
),
|
| 82 |
+
Task(
|
| 83 |
+
name="balanced_optimization",
|
| 84 |
+
description="Balance RAM below 60% and energy below 5 kWh",
|
| 85 |
+
difficulty=3,
|
| 86 |
+
ram_target=60.0,
|
| 87 |
+
energy_target=5.0,
|
| 88 |
+
max_steps=20
|
| 89 |
+
),
|
| 90 |
+
Task(
|
| 91 |
+
name="advanced_efficiency",
|
| 92 |
+
description="Achieve RAM below 50% and energy below 4 kWh",
|
| 93 |
+
difficulty=4,
|
| 94 |
+
ram_target=50.0,
|
| 95 |
+
energy_target=4.0,
|
| 96 |
+
max_steps=25
|
| 97 |
+
),
|
| 98 |
+
Task(
|
| 99 |
+
name="expert_optimization",
|
| 100 |
+
description="Master level: RAM below 40% and energy below 3 kWh",
|
| 101 |
+
difficulty=5,
|
| 102 |
+
ram_target=40.0,
|
| 103 |
+
energy_target=3.0,
|
| 104 |
+
max_steps=30
|
| 105 |
+
)
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
def _get_current_task(self) -> Task:
|
| 109 |
+
"""Get the current task, cycling through available tasks."""
|
| 110 |
+
if self.current_task_index >= len(self.tasks):
|
| 111 |
+
self.current_task_index = 0
|
| 112 |
+
return self.tasks[self.current_task_index]
|
| 113 |
+
|
| 114 |
+
def _calculate_reward(self, action: EnergyOptimizationAction) -> float:
|
| 115 |
+
"""Calculate reward based on action effectiveness and task progress."""
|
| 116 |
+
base_reward = 0.0
|
| 117 |
+
|
| 118 |
+
# Action effectiveness rewards
|
| 119 |
+
if action.action_type == "reduce_ram":
|
| 120 |
+
ram_reduction = min(5.0 * action.intensity, self.ram_usage * 0.1)
|
| 121 |
+
self.ram_usage = max(0.0, self.ram_usage - ram_reduction)
|
| 122 |
+
base_reward += ram_reduction * 0.5 # Reward for RAM reduction
|
| 123 |
+
|
| 124 |
+
# Penalty for excessive RAM reduction (system instability)
|
| 125 |
+
if action.intensity > 0.8:
|
| 126 |
+
base_reward -= 2.0
|
| 127 |
+
|
| 128 |
+
elif action.action_type == "optimize_energy":
|
| 129 |
+
energy_reduction = min(1.0 * action.intensity, self.energy_consumption * 0.15)
|
| 130 |
+
self.energy_consumption = max(0.0, self.energy_consumption - energy_reduction)
|
| 131 |
+
base_reward += energy_reduction * 2.0 # Higher reward for energy savings
|
| 132 |
+
|
| 133 |
+
# Penalty for aggressive energy optimization (performance impact)
|
| 134 |
+
if action.intensity > 0.9:
|
| 135 |
+
self.system_load = min(1.0, self.system_load + 0.1)
|
| 136 |
+
base_reward -= 1.0
|
| 137 |
+
|
| 138 |
+
elif action.action_type == "balance_resources":
|
| 139 |
+
# Balanced approach: moderate improvements to both
|
| 140 |
+
ram_reduction = min(2.0 * action.intensity, self.ram_usage * 0.05)
|
| 141 |
+
energy_reduction = min(0.5 * action.intensity, self.energy_consumption * 0.1)
|
| 142 |
+
|
| 143 |
+
self.ram_usage = max(0.0, self.ram_usage - ram_reduction)
|
| 144 |
+
self.energy_consumption = max(0.0, self.energy_consumption - energy_reduction)
|
| 145 |
+
|
| 146 |
+
base_reward += (ram_reduction * 0.3 + energy_reduction * 1.5)
|
| 147 |
+
|
| 148 |
+
elif action.action_type == "monitor_system":
|
| 149 |
+
# Monitoring action: small reward for gathering information
|
| 150 |
+
base_reward += 0.1
|
| 151 |
+
# Slight natural system load reduction from monitoring
|
| 152 |
+
self.system_load = max(0.0, self.system_load - 0.02)
|
| 153 |
+
|
| 154 |
+
# Natural system changes (simulate real system behavior)
|
| 155 |
+
self._apply_system_dynamics()
|
| 156 |
+
|
| 157 |
+
# Task completion bonus
|
| 158 |
+
current_task = self._get_current_task()
|
| 159 |
+
if not current_task.completed and current_task.check_completion(
|
| 160 |
+
self.ram_usage, self.energy_consumption, self._state.step_count
|
| 161 |
+
):
|
| 162 |
+
current_task.completed = True
|
| 163 |
+
self.tasks_completed.append(current_task.name)
|
| 164 |
+
base_reward += current_task.difficulty * 10.0 # Bonus for task completion
|
| 165 |
+
self.current_task_index += 1 # Move to next task
|
| 166 |
+
|
| 167 |
+
# Efficiency bonus
|
| 168 |
+
efficiency_improvement = (
|
| 169 |
+
(self.baseline_ram - self.ram_usage) / self.baseline_ram +
|
| 170 |
+
(self.baseline_energy - self.energy_consumption) / self.baseline_energy
|
| 171 |
+
) * 0.5
|
| 172 |
+
base_reward += efficiency_improvement
|
| 173 |
+
|
| 174 |
+
return base_reward
|
| 175 |
+
|
| 176 |
+
def _apply_system_dynamics(self):
|
| 177 |
+
"""Apply natural system dynamics and external factors."""
|
| 178 |
+
# Random external load changes
|
| 179 |
+
if random.random() < 0.1: # 10% chance each step
|
| 180 |
+
load_change = random.uniform(-0.05, 0.05)
|
| 181 |
+
self.system_load = max(0.0, min(1.0, self.system_load + load_change))
|
| 182 |
+
|
| 183 |
+
# Load affects RAM and energy
|
| 184 |
+
ram_impact = load_change * 10.0
|
| 185 |
+
energy_impact = load_change * 0.5
|
| 186 |
+
|
| 187 |
+
self.ram_usage = max(0.0, min(100.0, self.ram_usage + ram_impact))
|
| 188 |
+
self.energy_consumption = max(0.0, self.energy_consumption + energy_impact)
|
| 189 |
+
|
| 190 |
+
def _calculate_task_progress(self) -> float:
|
| 191 |
+
"""Calculate progress towards current task completion."""
|
| 192 |
+
current_task = self._get_current_task()
|
| 193 |
+
if current_task.completed:
|
| 194 |
+
return 1.0
|
| 195 |
+
|
| 196 |
+
# Calculate RAM progress (0-1 scale)
|
| 197 |
+
ram_progress = max(0.0, min(1.0, (100.0 - self.ram_usage) / (100.0 - current_task.ram_target)))
|
| 198 |
+
|
| 199 |
+
# Calculate energy progress (0-1 scale)
|
| 200 |
+
energy_range = 10.0 - current_task.energy_target # Total possible energy reduction
|
| 201 |
+
if energy_range > 0:
|
| 202 |
+
energy_progress = max(0.0, min(1.0, (8.0 - self.energy_consumption) / energy_range))
|
| 203 |
+
else:
|
| 204 |
+
energy_progress = 1.0 if self.energy_consumption <= current_task.energy_target else 0.0
|
| 205 |
+
|
| 206 |
+
return min(1.0, (ram_progress + energy_progress) / 2.0)
|
| 207 |
+
|
| 208 |
+
def _calculate_efficiency_score(self) -> float:
|
| 209 |
+
"""Calculate overall efficiency score."""
|
| 210 |
+
ram_efficiency = max(0.0, (100.0 - self.ram_usage) / 100.0)
|
| 211 |
+
energy_efficiency = max(0.0, (10.0 - self.energy_consumption) / 10.0)
|
| 212 |
+
return (ram_efficiency + energy_efficiency) / 2.0
|
| 213 |
+
|
| 214 |
+
def _task_to_summary(self, task: Task, steps_taken: int) -> TaskSummary:
|
| 215 |
+
"""Convert a Task to a TaskSummary for observations."""
|
| 216 |
+
remaining_steps = max(0, task.max_steps - steps_taken) if not task.completed else 0
|
| 217 |
+
progress = self._calculate_task_progress() if not task.completed else 1.0
|
| 218 |
+
|
| 219 |
+
return TaskSummary(
|
| 220 |
+
name=task.name,
|
| 221 |
+
description=task.description,
|
| 222 |
+
difficulty=task.difficulty,
|
| 223 |
+
ram_target=task.ram_target,
|
| 224 |
+
energy_target=task.energy_target,
|
| 225 |
+
max_steps=task.max_steps,
|
| 226 |
+
completed=task.completed,
|
| 227 |
+
remaining_steps=remaining_steps,
|
| 228 |
+
progress=progress
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
def reset(self) -> EnergyOptimizationObservation:
|
| 232 |
+
"""
|
| 233 |
+
Reset the environment to initial state.
|
| 234 |
+
|
| 235 |
+
Returns:
|
| 236 |
+
EnergyOptimizationObservation with initial system state
|
| 237 |
+
"""
|
| 238 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 239 |
+
self._reset_count += 1
|
| 240 |
+
|
| 241 |
+
# Reset system state
|
| 242 |
+
self.ram_usage = 80.0
|
| 243 |
+
self.energy_consumption = 8.0
|
| 244 |
+
self.system_load = 0.7
|
| 245 |
+
|
| 246 |
+
# Reset tasks
|
| 247 |
+
for task in self.tasks:
|
| 248 |
+
task.completed = False
|
| 249 |
+
self.current_task_index = 0
|
| 250 |
+
self.tasks_completed = []
|
| 251 |
+
|
| 252 |
+
# Reset baselines
|
| 253 |
+
self.baseline_ram = self.ram_usage
|
| 254 |
+
self.baseline_energy = self.energy_consumption
|
| 255 |
+
|
| 256 |
+
current_task = self._get_current_task()
|
| 257 |
+
|
| 258 |
+
return EnergyOptimizationObservation(
|
| 259 |
+
ram_usage=self.ram_usage,
|
| 260 |
+
energy_consumption=self.energy_consumption,
|
| 261 |
+
system_load=self.system_load,
|
| 262 |
+
current_task=self._task_to_summary(current_task, 0) if current_task else None,
|
| 263 |
+
tasks_completed=self.tasks_completed.copy(),
|
| 264 |
+
steps_taken=0,
|
| 265 |
+
task_progress=self._calculate_task_progress(),
|
| 266 |
+
efficiency_score=self._calculate_efficiency_score(),
|
| 267 |
+
done=False,
|
| 268 |
+
reward=0.0,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
def step(self, action: EnergyOptimizationAction) -> EnergyOptimizationObservation:
|
| 272 |
+
"""
|
| 273 |
+
Execute an optimization action in the environment.
|
| 274 |
+
|
| 275 |
+
Args:
|
| 276 |
+
action: EnergyOptimizationAction containing the optimization strategy
|
| 277 |
+
|
| 278 |
+
Returns:
|
| 279 |
+
EnergyOptimizationObservation with updated system state and reward
|
| 280 |
+
"""
|
| 281 |
+
self._state.step_count += 1
|
| 282 |
+
|
| 283 |
+
# Calculate reward for the action
|
| 284 |
+
reward = self._calculate_reward(action)
|
| 285 |
+
|
| 286 |
+
# Check if episode should end
|
| 287 |
+
done = self._state.step_count >= 100 or self.current_task_index >= len(self.tasks)
|
| 288 |
+
|
| 289 |
+
current_task = self._get_current_task()
|
| 290 |
+
|
| 291 |
+
return EnergyOptimizationObservation(
|
| 292 |
+
ram_usage=self.ram_usage,
|
| 293 |
+
energy_consumption=self.energy_consumption,
|
| 294 |
+
system_load=self.system_load,
|
| 295 |
+
current_task=self._task_to_summary(current_task, self._state.step_count) if current_task else None,
|
| 296 |
+
tasks_completed=self.tasks_completed.copy(),
|
| 297 |
+
steps_taken=self._state.step_count,
|
| 298 |
+
task_progress=self._calculate_task_progress(),
|
| 299 |
+
efficiency_score=self._calculate_efficiency_score(),
|
| 300 |
+
done=done,
|
| 301 |
+
reward=reward,
|
| 302 |
+
metadata={
|
| 303 |
+
"action_taken": action.action_type,
|
| 304 |
+
"action_intensity": action.intensity,
|
| 305 |
+
"episode_step": self._state.step_count,
|
| 306 |
+
"current_task_name": current_task.name if current_task else None
|
| 307 |
+
},
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
@property
|
| 311 |
+
def state(self) -> State:
|
| 312 |
+
"""
|
| 313 |
+
Get the current environment state.
|
| 314 |
+
|
| 315 |
+
Returns:
|
| 316 |
+
Current State with episode_id and step_count
|
| 317 |
+
"""
|
| 318 |
+
return self._state
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv[core]>=0.2.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
test_environment.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script for the Energy & Memory RAM Optimization Environment.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# Add the project root to Python path
|
| 10 |
+
project_root = os.path.dirname(__file__)
|
| 11 |
+
sys.path.insert(0, project_root)
|
| 12 |
+
|
| 13 |
+
# Mock the he_demo package for testing
|
| 14 |
+
import types
|
| 15 |
+
he_demo = types.ModuleType('he_demo')
|
| 16 |
+
|
| 17 |
+
# Import models and add to he_demo
|
| 18 |
+
from models import EnergyOptimizationAction, EnergyOptimizationObservation, Task, TaskSummary
|
| 19 |
+
he_demo.EnergyOptimizationAction = EnergyOptimizationAction
|
| 20 |
+
he_demo.EnergyOptimizationObservation = EnergyOptimizationObservation
|
| 21 |
+
he_demo.Task = Task
|
| 22 |
+
he_demo.TaskSummary = TaskSummary
|
| 23 |
+
|
| 24 |
+
# Add to sys.modules
|
| 25 |
+
sys.modules['he_demo'] = he_demo
|
| 26 |
+
sys.modules['he_demo.models'] = he_demo
|
| 27 |
+
|
| 28 |
+
# Now import the environment
|
| 29 |
+
from server.he_demo_environment import EnergyOptimizationEnvironment
|
| 30 |
+
|
| 31 |
+
def test_environment():
|
| 32 |
+
"""Test the energy optimization environment."""
|
| 33 |
+
print("Testing Energy & Memory RAM Optimization Environment")
|
| 34 |
+
print("=" * 60)
|
| 35 |
+
|
| 36 |
+
# Create environment
|
| 37 |
+
env = EnergyOptimizationEnvironment()
|
| 38 |
+
|
| 39 |
+
# Test reset
|
| 40 |
+
print("\n1. Testing reset...")
|
| 41 |
+
obs = env.reset()
|
| 42 |
+
print(f"Initial RAM usage: {obs.ram_usage:.1f}%")
|
| 43 |
+
print(f"Initial energy consumption: {obs.energy_consumption:.1f} kWh")
|
| 44 |
+
print(f"Initial system load: {obs.system_load:.2f}")
|
| 45 |
+
print(f"Current task: {obs.current_task.name if obs.current_task else 'None'}")
|
| 46 |
+
print(f"Tasks completed: {obs.tasks_completed}")
|
| 47 |
+
|
| 48 |
+
# Test different actions
|
| 49 |
+
actions_to_test = [
|
| 50 |
+
("reduce_ram", 0.8),
|
| 51 |
+
("optimize_energy", 0.7),
|
| 52 |
+
("balance_resources", 0.6),
|
| 53 |
+
("monitor_system", 0.5)
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
print("\n2. Testing actions...")
|
| 57 |
+
for action_type, intensity in actions_to_test:
|
| 58 |
+
action = EnergyOptimizationAction(action_type=action_type, intensity=intensity)
|
| 59 |
+
obs = env.step(action)
|
| 60 |
+
|
| 61 |
+
print(f"\nAction: {action_type} (intensity: {intensity})")
|
| 62 |
+
print(f"RAM usage: {obs.ram_usage:.1f}%")
|
| 63 |
+
print(f"Energy consumption: {obs.energy_consumption:.1f} kWh")
|
| 64 |
+
print(f"System load: {obs.system_load:.2f}")
|
| 65 |
+
print(f"Reward: {obs.reward:.2f}")
|
| 66 |
+
print(f"Task progress: {obs.task_progress:.2f}")
|
| 67 |
+
print(f"Efficiency score: {obs.efficiency_score:.2f}")
|
| 68 |
+
print(f"Current task: {obs.current_task.name if obs.current_task else 'None'}")
|
| 69 |
+
print(f"Tasks completed: {obs.tasks_completed}")
|
| 70 |
+
|
| 71 |
+
if obs.done:
|
| 72 |
+
print("Episode completed!")
|
| 73 |
+
break
|
| 74 |
+
|
| 75 |
+
print("\n3. Testing task progression...")
|
| 76 |
+
# Reset and try to complete a task
|
| 77 |
+
obs = env.reset()
|
| 78 |
+
steps = 0
|
| 79 |
+
max_test_steps = 20
|
| 80 |
+
|
| 81 |
+
while not obs.done and steps < max_test_steps:
|
| 82 |
+
# Simple strategy: alternate between RAM reduction and energy optimization
|
| 83 |
+
if steps % 2 == 0:
|
| 84 |
+
action = EnergyOptimizationAction(action_type="reduce_ram", intensity=0.9)
|
| 85 |
+
else:
|
| 86 |
+
action = EnergyOptimizationAction(action_type="optimize_energy", intensity=0.8)
|
| 87 |
+
|
| 88 |
+
obs = env.step(action)
|
| 89 |
+
steps += 1
|
| 90 |
+
|
| 91 |
+
print(f"Step {steps}: RAM={obs.ram_usage:.1f}%, Energy={obs.energy_consumption:.1f}kWh, Reward={obs.reward:.2f}")
|
| 92 |
+
|
| 93 |
+
if obs.current_task and obs.task_progress >= 1.0:
|
| 94 |
+
print(f"Task '{obs.current_task.name}' completed!")
|
| 95 |
+
break
|
| 96 |
+
|
| 97 |
+
print("\nTest completed successfully!")
|
| 98 |
+
print(f"Final state: RAM={obs.ram_usage:.1f}%, Energy={obs.energy_consumption:.1f}kWh")
|
| 99 |
+
print(f"Tasks completed: {len(obs.tasks_completed)}")
|
| 100 |
+
print(f"Total steps: {steps}")
|
| 101 |
+
|
| 102 |
+
if __name__ == "__main__":
|
| 103 |
+
test_environment()
|
train_agent.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Train an RL agent on the Energy Optimization Environment.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
| 9 |
+
|
| 10 |
+
# Mock the he_demo package for direct testing
|
| 11 |
+
import types
|
| 12 |
+
he_demo = types.ModuleType('he_demo')
|
| 13 |
+
from models import EnergyOptimizationAction, EnergyOptimizationObservation, Task, TaskSummary
|
| 14 |
+
he_demo.EnergyOptimizationAction = EnergyOptimizationAction
|
| 15 |
+
he_demo.EnergyOptimizationObservation = EnergyOptimizationObservation
|
| 16 |
+
he_demo.Task = Task
|
| 17 |
+
he_demo.TaskSummary = TaskSummary
|
| 18 |
+
sys.modules['he_demo'] = he_demo
|
| 19 |
+
sys.modules['he_demo.models'] = he_demo
|
| 20 |
+
|
| 21 |
+
from gym_wrapper import EnergyOptimizationGymEnv
|
| 22 |
+
from stable_baselines3 import PPO
|
| 23 |
+
from stable_baselines3.common.env_util import make_vec_env
|
| 24 |
+
|
| 25 |
+
def train_agent():
|
| 26 |
+
"""Train a PPO agent on the energy optimization environment."""
|
| 27 |
+
|
| 28 |
+
print("🚀 Training PPO Agent on Energy Optimization Environment")
|
| 29 |
+
print("=" * 60)
|
| 30 |
+
|
| 31 |
+
# Create vectorized environment for better training
|
| 32 |
+
def make_env():
|
| 33 |
+
return EnergyOptimizationGymEnv()
|
| 34 |
+
|
| 35 |
+
env = make_vec_env(make_env, n_envs=4)
|
| 36 |
+
|
| 37 |
+
# Create PPO agent
|
| 38 |
+
model = PPO(
|
| 39 |
+
"MlpPolicy",
|
| 40 |
+
env,
|
| 41 |
+
verbose=1,
|
| 42 |
+
learning_rate=3e-4,
|
| 43 |
+
n_steps=2048,
|
| 44 |
+
batch_size=64,
|
| 45 |
+
n_epochs=10,
|
| 46 |
+
gamma=0.99,
|
| 47 |
+
gae_lambda=0.95,
|
| 48 |
+
clip_range=0.2,
|
| 49 |
+
ent_coef=0.0,
|
| 50 |
+
vf_coef=0.5,
|
| 51 |
+
max_grad_norm=0.5,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Train the agent
|
| 55 |
+
print("Training for 10,000 timesteps...")
|
| 56 |
+
model.learn(total_timesteps=10000)
|
| 57 |
+
|
| 58 |
+
# Save the trained model
|
| 59 |
+
model.save("energy_optimization_ppo")
|
| 60 |
+
print("✅ Model saved as 'energy_optimization_ppo.zip'")
|
| 61 |
+
|
| 62 |
+
# Test the trained agent
|
| 63 |
+
print("\n🧪 Testing trained agent...")
|
| 64 |
+
test_env = EnergyOptimizationGymEnv()
|
| 65 |
+
obs, _ = test_env.reset()
|
| 66 |
+
|
| 67 |
+
total_reward = 0
|
| 68 |
+
steps = 0
|
| 69 |
+
|
| 70 |
+
while steps < 50:
|
| 71 |
+
# Get action from trained model
|
| 72 |
+
action, _ = model.predict(obs, deterministic=True)
|
| 73 |
+
|
| 74 |
+
# Execute action
|
| 75 |
+
obs, reward, done, _, _ = test_env.step(action)
|
| 76 |
+
|
| 77 |
+
total_reward += reward
|
| 78 |
+
steps += 1
|
| 79 |
+
|
| 80 |
+
# Convert action back to readable format
|
| 81 |
+
action_type_index = int(action[0])
|
| 82 |
+
intensity = float(action[1])
|
| 83 |
+
action_types = ["reduce_ram", "optimize_energy", "balance_resources", "monitor_system"]
|
| 84 |
+
action_type = action_types[action_type_index]
|
| 85 |
+
|
| 86 |
+
print(f"Step {steps}: {action_type}({intensity:.1f}) -> RAM={obs[0]:.1f}%, Energy={obs[1]:.1f}kWh, Reward={reward:.2f}")
|
| 87 |
+
|
| 88 |
+
if done:
|
| 89 |
+
break
|
| 90 |
+
|
| 91 |
+
if __name__ == "__main__":
|
| 92 |
+
train_agent()
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
validate.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Final validation script for the Energy & Memory RAM Optimization Environment.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# Add the project root to Python path
|
| 10 |
+
project_root = os.path.dirname(__file__)
|
| 11 |
+
sys.path.insert(0, project_root)
|
| 12 |
+
|
| 13 |
+
# Mock the he_demo package
|
| 14 |
+
import types
|
| 15 |
+
he_demo = types.ModuleType('he_demo')
|
| 16 |
+
|
| 17 |
+
# Import models and add to he_demo
|
| 18 |
+
from models import EnergyOptimizationAction, EnergyOptimizationObservation, Task, TaskSummary
|
| 19 |
+
he_demo.EnergyOptimizationAction = EnergyOptimizationAction
|
| 20 |
+
he_demo.EnergyOptimizationObservation = EnergyOptimizationObservation
|
| 21 |
+
he_demo.Task = Task
|
| 22 |
+
he_demo.TaskSummary = TaskSummary
|
| 23 |
+
|
| 24 |
+
# Add to sys.modules
|
| 25 |
+
sys.modules['he_demo'] = he_demo
|
| 26 |
+
sys.modules['he_demo.models'] = he_demo
|
| 27 |
+
|
| 28 |
+
# Now import the environment
|
| 29 |
+
from server.he_demo_environment import EnergyOptimizationEnvironment
|
| 30 |
+
|
| 31 |
+
def main():
|
| 32 |
+
print("🔋 Energy & Memory RAM Optimization Environment - Final Validation")
|
| 33 |
+
print("=" * 70)
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
# Create environment
|
| 37 |
+
env = EnergyOptimizationEnvironment()
|
| 38 |
+
print("✅ Environment created successfully")
|
| 39 |
+
|
| 40 |
+
# Test reset
|
| 41 |
+
obs = env.reset()
|
| 42 |
+
print("✅ Environment reset successfully")
|
| 43 |
+
print(f" Initial RAM: {obs.ram_usage:.1f}%")
|
| 44 |
+
print(f" Initial Energy: {obs.energy_consumption:.1f} kWh")
|
| 45 |
+
print(f" Current Task: {obs.current_task.name if obs.current_task else 'None'}")
|
| 46 |
+
|
| 47 |
+
# Test a few actions
|
| 48 |
+
actions = [
|
| 49 |
+
("reduce_ram", 0.8),
|
| 50 |
+
("optimize_energy", 0.7),
|
| 51 |
+
("balance_resources", 0.6)
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
+
for action_type, intensity in actions:
|
| 55 |
+
action = EnergyOptimizationAction(action_type=action_type, intensity=intensity)
|
| 56 |
+
obs = env.step(action)
|
| 57 |
+
print(f"✅ Action '{action_type}' executed: RAM={obs.ram_usage:.1f}%, Energy={obs.energy_consumption:.1f}kWh, Reward={obs.reward:.2f}")
|
| 58 |
+
|
| 59 |
+
print("\n🎉 All validation tests passed!")
|
| 60 |
+
print("🚀 The Energy & Memory RAM Optimization Environment is ready for deployment!")
|
| 61 |
+
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"❌ Validation failed: {e}")
|
| 64 |
+
sys.exit(1)
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|