burtenshaw HF Staff commited on
Commit
4729fab
·
verified ·
1 Parent(s): 8dc7c86

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build using openenv-base
8
+ # This Dockerfile is flexible and works for both:
9
+ # - In-repo environments (with local OpenEnv sources)
10
+ # - Standalone environments (with openenv from PyPI/Git)
11
+ # The build script (openenv build) handles context detection and sets appropriate build args.
12
+
13
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
+ FROM ${BASE_IMAGE} AS builder
15
+
16
+ WORKDIR /app
17
+
18
+ # Ensure git is available (required for installing dependencies from VCS)
19
+ RUN apt-get update && \
20
+ apt-get install -y --no-install-recommends git && \
21
+ rm -rf /var/lib/apt/lists/*
22
+
23
+ # Build argument to control whether we're building standalone or in-repo
24
+ ARG BUILD_MODE=in-repo
25
+ ARG ENV_NAME=benchmark
26
+
27
+ # Copy environment code (always at root of build context)
28
+ COPY . /app/env
29
+
30
+ # For in-repo builds, openenv is already vendored in the build context
31
+ # For standalone builds, openenv will be installed via pyproject.toml
32
+ WORKDIR /app/env
33
+
34
+ # Ensure uv is available (for local builds where base image lacks it)
35
+ RUN if ! command -v uv >/dev/null 2>&1; then \
36
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
37
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
38
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
39
+ fi
40
+
41
+ # Install dependencies using uv sync
42
+ # If uv.lock exists, use it; otherwise resolve on the fly
43
+ RUN --mount=type=cache,target=/root/.cache/uv \
44
+ if [ -f uv.lock ]; then \
45
+ uv sync --frozen --no-install-project --no-editable; \
46
+ else \
47
+ uv sync --no-install-project --no-editable; \
48
+ fi
49
+
50
+ RUN --mount=type=cache,target=/root/.cache/uv \
51
+ if [ -f uv.lock ]; then \
52
+ uv sync --frozen --no-editable; \
53
+ else \
54
+ uv sync --no-editable; \
55
+ fi
56
+
57
+ # Final runtime stage
58
+ FROM ${BASE_IMAGE}
59
+
60
+ WORKDIR /app
61
+
62
+ # Copy the virtual environment from builder
63
+ COPY --from=builder /app/env/.venv /app/.venv
64
+
65
+ # Copy the environment code
66
+ COPY --from=builder /app/env /app/env
67
+
68
+ # Set PATH to use the virtual environment
69
+ ENV PATH="/app/.venv/bin:$PATH"
70
+
71
+ # Set PYTHONPATH so imports work correctly
72
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
73
+
74
+ # Health check
75
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
76
+ CMD curl -f http://localhost:8000/health || exit 1
77
+
78
+ # Run the FastAPI server
79
+ # The module path is constructed to work with the /app/env structure
80
+ ENV ENABLE_WEB_INTERFACE=true
81
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
README.md CHANGED
@@ -1,10 +1,250 @@
1
  ---
2
- title: Openenv Benchmark
3
- emoji: 🌍
4
- colorFrom: blue
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Benchmark Environment Server
3
+ emoji: 🕹️
4
+ colorFrom: purple
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
  ---
13
 
14
+ # Benchmark Environment
15
+
16
+ A test environment for benchmarking infrastructure and concurrency. Actions specify how many seconds to wait (sleep), making it ideal for testing parallel execution and server scaling. Returns server identity information to verify which instance handled each request.
17
+
18
+ ## Quick Start
19
+
20
+ The simplest way to use the Benchmark environment is through the `BenchmarkEnv` class:
21
+
22
+ ```python
23
+ from benchmark import BenchmarkAction, BenchmarkEnv
24
+
25
+ try:
26
+ # Create environment from Docker image
27
+ benchmarkenv = BenchmarkEnv.from_docker_image("benchmark-env:latest")
28
+
29
+ # Reset - get server identity
30
+ result = benchmarkenv.reset()
31
+ print(f"Host URL: {result.observation.host_url}")
32
+ print(f"PID: {result.observation.pid}")
33
+ print(f"Session Hash: {result.observation.session_hash}")
34
+
35
+ # Test concurrency with different wait times
36
+ wait_times = [0.5, 1.0, 2.0]
37
+
38
+ for seconds in wait_times:
39
+ result = benchmarkenv.step(BenchmarkAction(wait_seconds=seconds))
40
+ print(f"Waited: {result.observation.waited_seconds}s")
41
+ print(f" → Timestamp: {result.observation.timestamp}")
42
+ print(f" → Reward: {result.reward}")
43
+ print(f" → Server PID: {result.observation.pid}")
44
+
45
+ finally:
46
+ # Always clean up
47
+ benchmarkenv.close()
48
+ ```
49
+
50
+ That's it! The `BenchmarkEnv.from_docker_image()` method handles:
51
+ - Starting the Docker container
52
+ - Waiting for the server to be ready
53
+ - Connecting to the environment
54
+ - Container cleanup when you call `close()`
55
+
56
+ ## Testing Concurrency
57
+
58
+ The benchmark environment is designed to test concurrent execution:
59
+
60
+ ```python
61
+ import asyncio
62
+ from benchmark import BenchmarkAction, BenchmarkEnv
63
+
64
+ async def parallel_requests():
65
+ # Connect to multiple servers or same server
66
+ clients = [
67
+ BenchmarkEnv(base_url="http://localhost:8000"),
68
+ BenchmarkEnv(base_url="http://localhost:8001"),
69
+ BenchmarkEnv(base_url="http://localhost:8002"),
70
+ ]
71
+
72
+ # Reset all clients
73
+ for client in clients:
74
+ result = client.reset()
75
+ print(f"Server {result.observation.session_hash}: PID {result.observation.pid}")
76
+
77
+ # Send concurrent requests with different wait times
78
+ import concurrent.futures
79
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
80
+ futures = []
81
+ for i, client in enumerate(clients):
82
+ future = executor.submit(
83
+ client.step,
84
+ BenchmarkAction(wait_seconds=i + 1)
85
+ )
86
+ futures.append((client, future))
87
+
88
+ for client, future in futures:
89
+ result = future.result()
90
+ print(f"Server {result.observation.session_hash} waited {result.observation.waited_seconds}s")
91
+
92
+ # Clean up
93
+ for client in clients:
94
+ client.close()
95
+ ```
96
+
97
+ ## Building the Docker Image
98
+
99
+ Before using the environment, you need to build the Docker image:
100
+
101
+ ```bash
102
+ # From project root
103
+ docker build -t benchmark-env:latest -f server/Dockerfile .
104
+ ```
105
+
106
+ ## Deploying to Hugging Face Spaces
107
+
108
+ You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
109
+
110
+ ```bash
111
+ # From the environment directory (where openenv.yaml is located)
112
+ openenv push
113
+
114
+ # Or specify options
115
+ openenv push --namespace my-org --private
116
+ ```
117
+
118
+ The `openenv push` command will:
119
+ 1. Validate that the directory is an OpenEnv environment (checks for `openenv.yaml`)
120
+ 2. Prepare a custom build for Hugging Face Docker space (enables web interface)
121
+ 3. Upload to Hugging Face (ensuring you're logged in)
122
+
123
+ ### Prerequisites
124
+
125
+ - Authenticate with Hugging Face: The command will prompt for login if not already authenticated
126
+
127
+ ### Options
128
+
129
+ - `--directory`, `-d`: Directory containing the OpenEnv environment (defaults to current directory)
130
+ - `--repo-id`, `-r`: Repository ID in format 'username/repo-name' (defaults to 'username/env-name' from openenv.yaml)
131
+ - `--base-image`, `-b`: Base Docker image to use (overrides Dockerfile FROM)
132
+ - `--private`: Deploy the space as private (default: public)
133
+
134
+ ### Examples
135
+
136
+ ```bash
137
+ # Push to your personal namespace (defaults to username/env-name from openenv.yaml)
138
+ openenv push
139
+
140
+ # Push to a specific repository
141
+ openenv push --repo-id my-org/my-env
142
+
143
+ # Push with a custom base image
144
+ openenv push --base-image ghcr.io/meta-pytorch/openenv-base:latest
145
+
146
+ # Push as a private space
147
+ openenv push --private
148
+
149
+ # Combine options
150
+ openenv push --repo-id my-org/my-env --base-image custom-base:latest --private
151
+ ```
152
+
153
+ After deployment, your space will be available at:
154
+ `https://huggingface.co/spaces/<repo-id>`
155
+
156
+ The deployed space includes:
157
+ - **Web Interface** at `/web` - Interactive UI for exploring the environment
158
+ - **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
159
+ - **Health Check** at `/health` - Container health monitoring
160
+
161
+ ## Environment Details
162
+
163
+ ### Action
164
+ **BenchmarkAction**: Contains a single field
165
+ - `wait_seconds` (float) - Seconds to wait/sleep before returning (default: 0.0)
166
+
167
+ ### Observation
168
+ **BenchmarkObservation**: Contains server identity and timing information
169
+ - `host_url` (str) - The URL of the server that handled the request
170
+ - `pid` (int) - Process ID of the server
171
+ - `session_hash` (str) - Unique 16-character hash identifying this server session
172
+ - `waited_seconds` (float) - Actual seconds waited
173
+ - `timestamp` (float) - Unix timestamp when observation was created
174
+ - `reward` (float) - Reward based on wait time
175
+ - `done` (bool) - Always False for benchmark environment
176
+ - `metadata` (dict) - Additional info
177
+
178
+ ### Reward
179
+ The reward is calculated as: `1.0 / (1.0 + wait_seconds)`
180
+ - 0 seconds → reward: 1.0
181
+ - 1 second → reward: 0.5
182
+ - 2 seconds → reward: 0.33
183
+ - Encourages faster responses
184
+
185
+ ## Advanced Usage
186
+
187
+ ### Connecting to an Existing Server
188
+
189
+ If you already have a Benchmark environment server running, you can connect directly:
190
+
191
+ ```python
192
+ from benchmark import BenchmarkEnv, BenchmarkAction
193
+
194
+ # Connect to existing server
195
+ benchmarkenv = BenchmarkEnv(base_url="<ENV_HTTP_URL_HERE>")
196
+
197
+ # Use as normal
198
+ result = benchmarkenv.reset()
199
+ print(f"Connected to server: {result.observation.host_url}")
200
+ print(f"Session: {result.observation.session_hash}")
201
+
202
+ result = benchmarkenv.step(BenchmarkAction(wait_seconds=1.5))
203
+ print(f"Waited {result.observation.waited_seconds}s")
204
+ ```
205
+
206
+ Note: When connecting to an existing server, `benchmarkenv.close()` will NOT stop the server.
207
+
208
+ ## Development & Testing
209
+
210
+ ### Direct Environment Testing
211
+
212
+ Test the environment logic directly without starting the HTTP server:
213
+
214
+ ```bash
215
+ # From the server directory
216
+ python3 server/benchmark_environment.py
217
+ ```
218
+
219
+ This verifies that:
220
+ - Environment resets correctly
221
+ - Step executes actions properly
222
+ - State tracking works
223
+ - Server identity is returned correctly
224
+
225
+ ### Running Locally
226
+
227
+ Run the server locally for development:
228
+
229
+ ```bash
230
+ uvicorn server.app:app --reload
231
+ ```
232
+
233
+ ## Project Structure
234
+
235
+ ```
236
+ benchmark/
237
+ ├── .dockerignore # Docker build exclusions
238
+ ├── __init__.py # Module exports
239
+ ├── README.md # This file
240
+ ├── openenv.yaml # OpenEnv manifest
241
+ ├── pyproject.toml # Project metadata and dependencies
242
+ ├── uv.lock # Locked dependencies (generated)
243
+ ├── client.py # BenchmarkEnv client implementation
244
+ ├── models.py # Action and Observation models
245
+ └── server/
246
+ ├── __init__.py # Server module exports
247
+ ├── benchmark_environment.py # Core environment logic
248
+ ├── app.py # FastAPI application
249
+ └── Dockerfile # Container image definition
250
+ ```
__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Benchmark Environment - Test environment for infrastructure and concurrency benchmarking."""
8
+
9
+ from .client import BenchmarkEnv
10
+ from .models import BenchmarkAction, BenchmarkObservation
11
+
12
+ __all__ = ["BenchmarkAction", "BenchmarkObservation", "BenchmarkEnv"]
13
+
client.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Benchmark Environment HTTP Client.
9
+
10
+ This module provides the client for connecting to a Benchmark Environment server
11
+ over HTTP. Useful for testing concurrency and infrastructure.
12
+ """
13
+
14
+ from typing import Dict
15
+
16
+ from openenv.core.client_types import StepResult
17
+ from openenv.core.env_server.types import State
18
+ from openenv.core.http_env_client import HTTPEnvClient
19
+
20
+ from .models import BenchmarkAction, BenchmarkObservation
21
+
22
+
23
+ class BenchmarkEnv(HTTPEnvClient[BenchmarkAction, BenchmarkObservation]):
24
+ """
25
+ HTTP client for the Benchmark Environment.
26
+
27
+ This client connects to a BenchmarkEnvironment HTTP server and provides
28
+ methods to interact with it: reset(), step(), and state access.
29
+
30
+ Example:
31
+ >>> # Connect to a running server
32
+ >>> client = BenchmarkEnv(base_url="http://localhost:8000")
33
+ >>> result = client.reset()
34
+ >>> print(result.observation.host_url)
35
+ >>> print(result.observation.pid)
36
+ >>> print(result.observation.session_hash)
37
+ >>>
38
+ >>> # Test concurrency by waiting
39
+ >>> result = client.step(BenchmarkAction(wait_seconds=2.0))
40
+ >>> print(result.observation.waited_seconds)
41
+
42
+ Example with Docker:
43
+ >>> # Automatically start container and connect
44
+ >>> client = BenchmarkEnv.from_docker_image("benchmark-env:latest")
45
+ >>> result = client.reset()
46
+ >>> result = client.step(BenchmarkAction(wait_seconds=1.0))
47
+ """
48
+
49
+ def _step_payload(self, action: BenchmarkAction) -> Dict:
50
+ """
51
+ Convert BenchmarkAction to JSON payload for step request.
52
+
53
+ Args:
54
+ action: BenchmarkAction instance
55
+
56
+ Returns:
57
+ Dictionary representation suitable for JSON encoding
58
+ """
59
+ return {
60
+ "wait_seconds": action.wait_seconds,
61
+ }
62
+
63
+ def _parse_result(self, payload: Dict) -> StepResult[BenchmarkObservation]:
64
+ """
65
+ Parse server response into StepResult[BenchmarkObservation].
66
+
67
+ Args:
68
+ payload: JSON response from server
69
+
70
+ Returns:
71
+ StepResult with BenchmarkObservation
72
+ """
73
+ obs_data = payload.get("observation", {})
74
+ observation = BenchmarkObservation(
75
+ host_url=obs_data.get("host_url", ""),
76
+ pid=obs_data.get("pid", 0),
77
+ session_hash=obs_data.get("session_hash", ""),
78
+ waited_seconds=obs_data.get("waited_seconds", 0.0),
79
+ timestamp=obs_data.get("timestamp", 0.0),
80
+ done=payload.get("done", False),
81
+ reward=payload.get("reward"),
82
+ metadata=obs_data.get("metadata", {}),
83
+ )
84
+
85
+ return StepResult(
86
+ observation=observation,
87
+ reward=payload.get("reward"),
88
+ done=payload.get("done", False),
89
+ )
90
+
91
+ def _parse_state(self, payload: Dict) -> State:
92
+ """
93
+ Parse server response into State object.
94
+
95
+ Args:
96
+ payload: JSON response from /state endpoint
97
+
98
+ Returns:
99
+ State object with episode_id and step_count
100
+ """
101
+ return State(
102
+ episode_id=payload.get("episode_id"),
103
+ step_count=payload.get("step_count", 0),
104
+ )
models.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for the Benchmark Environment.
9
+
10
+ The benchmark environment is designed for testing concurrency and infrastructure.
11
+ Actions specify a wait time in seconds, allowing testing of parallel execution.
12
+ """
13
+
14
+ from pydantic import Field
15
+
16
+ from openenv.core.env_server.types import Action, Observation
17
+
18
+
19
+ class BenchmarkAction(Action):
20
+ """Action for the Benchmark environment - specifies seconds to wait."""
21
+
22
+ wait_seconds: float = Field(default=0.0, ge=0.0, description="Seconds to wait/sleep")
23
+
24
+
25
+ class BenchmarkObservation(Observation):
26
+ """Observation from the Benchmark environment with server identity info."""
27
+
28
+ # Server identity
29
+ host_url: str = Field(default="", description="URL of the server that handled the request")
30
+ pid: int = Field(default=0, description="Process ID of the server")
31
+ session_hash: str = Field(default="", description="Unique hash identifying this server session")
32
+
33
+ # Timing info
34
+ waited_seconds: float = Field(default=0.0, description="Actual seconds waited")
35
+ timestamp: float = Field(default=0.0, description="Unix timestamp when observation was created")
openenv.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: benchmark
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
7
+
openenv_benchmark.egg-info/PKG-INFO ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: openenv-benchmark
3
+ Version: 0.1.0
4
+ Summary: Benchmark environment for OpenEnv
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: openenv[core]>=0.2.0
7
+ Provides-Extra: dev
8
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
9
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
openenv_benchmark.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ README.md
2
+ pyproject.toml
3
+ ./__init__.py
4
+ ./client.py
5
+ ./models.py
6
+ openenv_benchmark.egg-info/PKG-INFO
7
+ openenv_benchmark.egg-info/SOURCES.txt
8
+ openenv_benchmark.egg-info/dependency_links.txt
9
+ openenv_benchmark.egg-info/entry_points.txt
10
+ openenv_benchmark.egg-info/requires.txt
11
+ openenv_benchmark.egg-info/top_level.txt
12
+ server/__init__.py
13
+ server/app.py
14
+ server/benchmark_environment.py
openenv_benchmark.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
openenv_benchmark.egg-info/entry_points.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [console_scripts]
2
+ server = benchmark.server.app:main
openenv_benchmark.egg-info/requires.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openenv[core]>=0.2.0
2
+
3
+ [dev]
4
+ pytest>=8.0.0
5
+ pytest-cov>=4.0.0
openenv_benchmark.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ benchmark
pyproject.toml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ [build-system]
8
+ requires = ["setuptools>=45", "wheel"]
9
+ build-backend = "setuptools.build_meta"
10
+
11
+ [project]
12
+ name = "openenv-benchmark"
13
+ version = "0.1.0"
14
+ description = "Benchmark environment for OpenEnv"
15
+ requires-python = ">=3.10"
16
+ dependencies = [
17
+ # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
18
+ "openenv[core]>=0.2.0",
19
+ # Environment-specific dependencies
20
+ # Add all dependencies needed for your environment here
21
+ # Examples:
22
+ # "numpy>=1.19.0",
23
+ # "torch>=2.0.0",
24
+ # "gymnasium>=0.29.0",
25
+ # "openspiel>=1.0.0",
26
+ # "smolagents>=1.22.0,<2",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ dev = [
31
+ "pytest>=8.0.0",
32
+ "pytest-cov>=4.0.0",
33
+ ]
34
+
35
+ [project.scripts]
36
+ # Server entry point - enables running via: uv run --project . server
37
+ # or: python -m benchmark.server.app
38
+ server = "benchmark.server.app:main"
39
+
40
+ [tool.setuptools]
41
+ include-package-data = true
42
+ packages = ["benchmark", "benchmark.server"]
43
+ package-dir = { "benchmark" = ".", "benchmark.server" = "server" }
server/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Benchmark environment server components."""
8
+
9
+ from .benchmark_environment import BenchmarkEnvironment
10
+
11
+ __all__ = ["BenchmarkEnvironment"]
12
+
server/app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ FastAPI application for the Benchmark Environment.
9
+
10
+ This module creates an HTTP server that exposes the BenchmarkEnvironment
11
+ over HTTP endpoints, making it compatible with HTTPEnvClient.
12
+
13
+ Usage:
14
+ # Development (with auto-reload):
15
+ uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
16
+
17
+ # Production:
18
+ uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
19
+
20
+ # Or run directly:
21
+ python -m server.app
22
+ """
23
+
24
+ try:
25
+ from openenv.core.env_server.http_server import create_app
26
+ except Exception as e: # pragma: no cover
27
+ raise ImportError(
28
+ "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
29
+ ) from e
30
+
31
+ from benchmark.models import BenchmarkAction, BenchmarkObservation
32
+ from .benchmark_environment import BenchmarkEnvironment
33
+
34
+ # Create the environment instance
35
+ env = BenchmarkEnvironment()
36
+
37
+ # Create the app with web interface and README integration
38
+ app = create_app(
39
+ env,
40
+ BenchmarkAction,
41
+ BenchmarkObservation,
42
+ env_name="benchmark",
43
+ )
44
+
45
+
46
+ def main(host: str = "0.0.0.0", port: int = 8000):
47
+ """
48
+ Entry point for direct execution via uv run or python -m.
49
+
50
+ This function enables running the server without Docker:
51
+ uv run --project . server
52
+ uv run --project . server --port 8001
53
+ python -m benchmark.server.app
54
+
55
+ Args:
56
+ host: Host address to bind to (default: "0.0.0.0")
57
+ port: Port number to listen on (default: 8000)
58
+
59
+ For production deployments, consider using uvicorn directly with
60
+ multiple workers:
61
+ uvicorn benchmark.server.app:app --workers 4
62
+ """
63
+ import uvicorn
64
+
65
+ uvicorn.run(app, host=host, port=port)
66
+
67
+
68
+ if __name__ == "__main__":
69
+ import argparse
70
+
71
+ parser = argparse.ArgumentParser()
72
+ parser.add_argument("--port", type=int, default=8000)
73
+ args = parser.parse_args()
74
+ main(port=args.port)
server/benchmark_environment.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Benchmark Environment Implementation.
9
+
10
+ A test environment for benchmarking infrastructure and concurrency.
11
+ Actions specify how many seconds to wait, allowing testing of parallel execution.
12
+ """
13
+
14
+ import asyncio
15
+ import hashlib
16
+ import os
17
+ import socket
18
+ import time
19
+ from uuid import uuid4
20
+
21
+ from openenv.core.env_server.interfaces import Environment
22
+ from openenv.core.env_server.types import State
23
+
24
+ from models import BenchmarkAction, BenchmarkObservation
25
+
26
+
27
+ def _get_host_url() -> str:
28
+ """Get the host URL for this server."""
29
+ hostname = socket.gethostname()
30
+ port = os.environ.get("PORT", "8000")
31
+ # Try to get the actual IP if possible
32
+ try:
33
+ ip = socket.gethostbyname(hostname)
34
+ except socket.gaierror:
35
+ ip = "127.0.0.1"
36
+ return f"http://{ip}:{port}"
37
+
38
+
39
+ class BenchmarkEnvironment(Environment):
40
+ """
41
+ A benchmark environment for testing concurrency and infrastructure.
42
+
43
+ Actions specify a number of seconds to wait (sleep), which is useful for
44
+ testing parallel execution and concurrency limits. The environment returns
45
+ identity information (host_url, pid, session_hash) to help verify which
46
+ server instance handled the request.
47
+
48
+ Example:
49
+ >>> env = BenchmarkEnvironment()
50
+ >>> obs = env.reset()
51
+ >>> print(obs.host_url) # "http://192.168.1.1:8000"
52
+ >>> print(obs.pid) # 12345
53
+ >>> print(obs.session_hash) # "a1b2c3d4..."
54
+ >>>
55
+ >>> obs = env.step(BenchmarkAction(wait_seconds=2.0))
56
+ >>> print(obs.waited_seconds) # 2.0
57
+ """
58
+
59
+ def __init__(self):
60
+ """Initialize the benchmark environment."""
61
+ self._state = State(episode_id=str(uuid4()), step_count=0)
62
+ self._session_hash = hashlib.sha256(
63
+ f"{uuid4()}-{time.time()}-{os.getpid()}".encode()
64
+ ).hexdigest()[:16]
65
+ self._pid = os.getpid()
66
+ self._host_url = _get_host_url()
67
+
68
+ def _make_observation(
69
+ self, waited_seconds: float = 0.0, done: bool = False, reward: float = 0.0
70
+ ) -> BenchmarkObservation:
71
+ """Create an observation with current server identity."""
72
+ return BenchmarkObservation(
73
+ host_url=self._host_url,
74
+ pid=self._pid,
75
+ session_hash=self._session_hash,
76
+ waited_seconds=waited_seconds,
77
+ timestamp=time.time(),
78
+ done=done,
79
+ reward=reward,
80
+ )
81
+
82
+ def reset(self) -> BenchmarkObservation:
83
+ """
84
+ Reset the environment.
85
+
86
+ Returns:
87
+ BenchmarkObservation with server identity info
88
+ """
89
+ self._state = State(episode_id=str(uuid4()), step_count=0)
90
+
91
+ return self._make_observation(waited_seconds=0.0, done=False, reward=0.0)
92
+
93
+ def step(self, action: BenchmarkAction) -> BenchmarkObservation: # type: ignore[override]
94
+ """
95
+ Execute a step by waiting for the specified seconds.
96
+
97
+ Args:
98
+ action: BenchmarkAction containing wait_seconds
99
+
100
+ Returns:
101
+ BenchmarkObservation with server identity and timing info
102
+ """
103
+ self._state.step_count += 1
104
+
105
+ wait_time = max(0.0, action.wait_seconds)
106
+
107
+ # Synchronous sleep - for async version, use step_async
108
+ if wait_time > 0:
109
+ time.sleep(wait_time)
110
+
111
+ # Reward based on wait time (inverse - faster is better)
112
+ reward = 1.0 / (1.0 + wait_time)
113
+
114
+ return self._make_observation(
115
+ waited_seconds=wait_time,
116
+ done=False,
117
+ reward=reward,
118
+ )
119
+
120
+ async def step_async(self, action: BenchmarkAction) -> BenchmarkObservation:
121
+ """
122
+ Async version of step - uses asyncio.sleep for better concurrency.
123
+
124
+ Args:
125
+ action: BenchmarkAction containing wait_seconds
126
+
127
+ Returns:
128
+ BenchmarkObservation with server identity and timing info
129
+ """
130
+ self._state.step_count += 1
131
+
132
+ wait_time = max(0.0, action.wait_seconds)
133
+
134
+ if wait_time > 0:
135
+ await asyncio.sleep(wait_time)
136
+
137
+ reward = 1.0 / (1.0 + wait_time)
138
+
139
+ return self._make_observation(
140
+ waited_seconds=wait_time,
141
+ done=False,
142
+ reward=reward,
143
+ )
144
+
145
+ @property
146
+ def state(self) -> State:
147
+ """
148
+ Get the current environment state.
149
+
150
+ Returns:
151
+ Current State with episode_id and step_count
152
+ """
153
+ return self._state
server/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openenv[core]>=0.2.0
2
+ fastapi>=0.115.0
3
+ uvicorn>=0.24.0
4
+
5
+
6
+
test_concurrency.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script for benchmark environment concurrency.
4
+
5
+ Run the server first:
6
+ cd benchmark && uvicorn server.app:app --reload --port 8000
7
+
8
+ Then run this script:
9
+ python test_concurrency.py --requests 10 --wait 1.0
10
+ """
11
+
12
+ import argparse
13
+ import asyncio
14
+ import time
15
+
16
+ import httpx
17
+
18
+
19
+ BASE_URL = "http://localhost:8000"
20
+
21
+
22
+ async def reset(client: httpx.AsyncClient) -> dict:
23
+ """Reset the environment and return observation."""
24
+ response = await client.post(f"{BASE_URL}/reset")
25
+ response.raise_for_status()
26
+ return response.json()
27
+
28
+
29
+ async def step(client: httpx.AsyncClient, wait_seconds: float) -> dict:
30
+ """Execute a step with the given wait time."""
31
+ response = await client.post(
32
+ f"{BASE_URL}/step",
33
+ json={"action": {"wait_seconds": wait_seconds}},
34
+ )
35
+ response.raise_for_status()
36
+ return response.json()
37
+
38
+
39
+ async def timed_request(client: httpx.AsyncClient, wait_seconds: float, request_id: int) -> dict:
40
+ """Make a timed request and return results with timing info."""
41
+ start = time.perf_counter()
42
+ result = await step(client, wait_seconds)
43
+ elapsed = time.perf_counter() - start
44
+
45
+ obs = result["observation"]
46
+ return {
47
+ "request_id": request_id,
48
+ "wait_requested": wait_seconds,
49
+ "elapsed": elapsed,
50
+ "pid": obs["pid"],
51
+ "session_hash": obs["session_hash"],
52
+ }
53
+
54
+
55
+ async def test_concurrent(num_requests: int, wait_seconds: float) -> dict:
56
+ """Test concurrent requests and return timing stats."""
57
+ async with httpx.AsyncClient(timeout=60.0) as client:
58
+ # Reset first
59
+ reset_result = await reset(client)
60
+ obs = reset_result["observation"]
61
+ print(f"Server: {obs['host_url']} | PID: {obs['pid']} | Session: {obs['session_hash']}")
62
+ print(f"Running {num_requests} concurrent requests, each waiting {wait_seconds}s...")
63
+
64
+ start = time.perf_counter()
65
+
66
+ # Launch all requests concurrently
67
+ tasks = [timed_request(client, wait_seconds, i) for i in range(num_requests)]
68
+ results = await asyncio.gather(*tasks)
69
+
70
+ total_time = time.perf_counter() - start
71
+ avg_time = sum(r["elapsed"] for r in results) / len(results)
72
+
73
+ return {
74
+ "num_requests": num_requests,
75
+ "wait_seconds": wait_seconds,
76
+ "total_time": total_time,
77
+ "avg_time": avg_time,
78
+ }
79
+
80
+
81
+ async def main():
82
+ parser = argparse.ArgumentParser(description="Test benchmark environment concurrency")
83
+ parser.add_argument("--requests", "-n", type=int, default=10, help="Number of concurrent requests")
84
+ parser.add_argument("--wait", "-w", type=float, default=1.0, help="Wait time per request (seconds)")
85
+ parser.add_argument("--url", "-u", type=str, default="http://localhost:8000", help="Server URL")
86
+ args = parser.parse_args()
87
+
88
+ global BASE_URL
89
+ BASE_URL = args.url
90
+
91
+ result = await test_concurrent(args.requests, args.wait)
92
+
93
+ print(f"\nTotal time: {result['total_time']:.3f}s")
94
+ print(f"Avg time: {result['avg_time']:.3f}s")
95
+
96
+
97
+ if __name__ == "__main__":
98
+ asyncio.run(main())