Upload folder using huggingface_hub
Browse files- Dockerfile +12 -9
- README.md +102 -44
- client.py +5 -5
- models.py +6 -5
- pyproject.toml +2 -4
- rewards.py +9 -3
- server/app.py +2 -2
- server/environment.py +24 -4
- server/run_local.sh +1 -1
- uv.lock +6 -6
Dockerfile
CHANGED
|
@@ -17,7 +17,6 @@ WORKDIR /app
|
|
| 17 |
|
| 18 |
# Build argument to control whether we're building standalone or in-repo
|
| 19 |
ARG BUILD_MODE=in-repo
|
| 20 |
-
ARG ENV_NAME=textarena
|
| 21 |
|
| 22 |
# Copy environment code (always at root of build context)
|
| 23 |
COPY . /app/env
|
|
@@ -33,16 +32,14 @@ RUN if ! command -v uv >/dev/null 2>&1; then \
|
|
| 33 |
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 34 |
fi
|
| 35 |
|
| 36 |
-
# Install
|
| 37 |
-
# Also install git for building from git repos
|
| 38 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 39 |
-
libgl1 \
|
| 40 |
-
libglib2.0-0 \
|
| 41 |
git \
|
| 42 |
&& rm -rf /var/lib/apt/lists/*
|
| 43 |
-
|
| 44 |
# Install dependencies using uv sync
|
| 45 |
-
#
|
|
|
|
| 46 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 47 |
if [ -f uv.lock ]; then \
|
| 48 |
uv sync --frozen --no-install-project --no-editable; \
|
|
@@ -62,6 +59,12 @@ FROM ${BASE_IMAGE}
|
|
| 62 |
|
| 63 |
WORKDIR /app
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
# Copy the virtual environment from builder
|
| 66 |
COPY --from=builder /app/env/.venv /app/.venv
|
| 67 |
|
|
@@ -74,9 +77,9 @@ ENV PATH="/app/.venv/bin:$PATH"
|
|
| 74 |
# Set PYTHONPATH so imports work correctly
|
| 75 |
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 76 |
|
| 77 |
-
# Health check
|
| 78 |
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 79 |
-
CMD
|
| 80 |
|
| 81 |
# Run the FastAPI server
|
| 82 |
# The module path is constructed to work with the /app/env structure
|
|
|
|
| 17 |
|
| 18 |
# Build argument to control whether we're building standalone or in-repo
|
| 19 |
ARG BUILD_MODE=in-repo
|
|
|
|
| 20 |
|
| 21 |
# Copy environment code (always at root of build context)
|
| 22 |
COPY . /app/env
|
|
|
|
| 32 |
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 33 |
fi
|
| 34 |
|
| 35 |
+
# Install git for building from git repos (build-time only)
|
|
|
|
| 36 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
|
|
|
| 37 |
git \
|
| 38 |
&& rm -rf /var/lib/apt/lists/*
|
| 39 |
+
|
| 40 |
# Install dependencies using uv sync
|
| 41 |
+
# First pass: install dependencies without the project (for better caching)
|
| 42 |
+
# Second pass: install the project itself
|
| 43 |
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
if [ -f uv.lock ]; then \
|
| 45 |
uv sync --frozen --no-install-project --no-editable; \
|
|
|
|
| 59 |
|
| 60 |
WORKDIR /app
|
| 61 |
|
| 62 |
+
# Install runtime system libraries required by TextArena (cv2 needs libGL, glib)
|
| 63 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 64 |
+
libgl1 \
|
| 65 |
+
libglib2.0-0 \
|
| 66 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 67 |
+
|
| 68 |
# Copy the virtual environment from builder
|
| 69 |
COPY --from=builder /app/env/.venv /app/.venv
|
| 70 |
|
|
|
|
| 77 |
# Set PYTHONPATH so imports work correctly
|
| 78 |
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 79 |
|
| 80 |
+
# Health check using Python (more portable than curl/wget)
|
| 81 |
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 82 |
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
| 83 |
|
| 84 |
# Run the FastAPI server
|
| 85 |
# The module path is constructed to work with the /app/env structure
|
README.md
CHANGED
|
@@ -13,36 +13,45 @@ tags:
|
|
| 13 |
|
| 14 |
# TextArena Environment
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
## Quick Start
|
| 19 |
|
| 20 |
The simplest way to use the TextArena environment is through the `TextArenaEnv` class:
|
| 21 |
|
| 22 |
```python
|
| 23 |
-
from
|
| 24 |
|
| 25 |
try:
|
| 26 |
# Create environment from Docker image
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
print(f"Sent: '{msg}'")
|
| 39 |
-
print(f" → Echoed: '{result.observation.echoed_message}'")
|
| 40 |
-
print(f" → Length: {result.observation.message_length}")
|
| 41 |
-
print(f" → Reward: {result.reward}")
|
| 42 |
|
| 43 |
finally:
|
| 44 |
# Always clean up
|
| 45 |
-
|
| 46 |
```
|
| 47 |
|
| 48 |
That's it! The `TextArenaEnv.from_docker_image()` method handles:
|
|
@@ -118,22 +127,48 @@ The deployed space includes:
|
|
| 118 |
## Environment Details
|
| 119 |
|
| 120 |
### Action
|
|
|
|
| 121 |
**TextArenaAction**: Contains a single field
|
| 122 |
-
- `message` (str) - The message to
|
| 123 |
|
| 124 |
### Observation
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
- `
|
| 129 |
-
- `
|
| 130 |
-
- `
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
### Reward
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
-
|
| 136 |
-
- Empty message → reward: 0.0
|
| 137 |
|
| 138 |
## Advanced Usage
|
| 139 |
|
|
@@ -142,17 +177,28 @@ The reward is calculated as: `message_length × 0.1`
|
|
| 142 |
If you already have a TextArena environment server running, you can connect directly:
|
| 143 |
|
| 144 |
```python
|
| 145 |
-
from
|
| 146 |
|
| 147 |
# Connect to existing server
|
| 148 |
-
|
| 149 |
|
| 150 |
# Use as normal
|
| 151 |
-
result =
|
| 152 |
-
result =
|
|
|
|
|
|
|
|
|
|
| 153 |
```
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
## Development & Testing
|
| 158 |
|
|
@@ -160,16 +206,21 @@ Note: When connecting to an existing server, `textarenaenv.close()` will NOT sto
|
|
| 160 |
|
| 161 |
Test the environment logic directly without starting the HTTP server:
|
| 162 |
|
| 163 |
-
```
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
### Running Locally
|
| 175 |
|
|
@@ -180,24 +231,31 @@ Run the server locally for development:
|
|
| 180 |
uv venv && source .venv/bin/activate
|
| 181 |
uv pip install -e .
|
| 182 |
|
| 183 |
-
# Start the server
|
| 184 |
python -m uvicorn server.app:app --reload
|
| 185 |
```
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
## Project Structure
|
| 188 |
|
| 189 |
```
|
| 190 |
-
|
| 191 |
├── __init__.py # Module exports
|
| 192 |
├── README.md # This file
|
| 193 |
├── openenv.yaml # OpenEnv manifest
|
| 194 |
├── pyproject.toml # Project metadata and dependencies
|
| 195 |
├── uv.lock # Locked dependencies (generated)
|
| 196 |
├── client.py # TextArenaEnv client implementation
|
| 197 |
-
├── models.py # Action and
|
|
|
|
| 198 |
└── server/
|
| 199 |
├── __init__.py # Server module exports
|
| 200 |
-
├──
|
| 201 |
├── app.py # FastAPI application
|
| 202 |
└── Dockerfile # Container image definition
|
| 203 |
```
|
|
|
|
| 13 |
|
| 14 |
# TextArena Environment
|
| 15 |
|
| 16 |
+
An OpenEnv wrapper for [TextArena](https://github.com/textarena/textarena) game environments. Supports text-based games like Wordle, providing a standardized API for agent interaction.
|
| 17 |
+
|
| 18 |
+
> [!NOTE]
|
| 19 |
+
> Generic wrapper for any [TextArena](https://www.textarena.ai/docs/overview) game inside OpenEnv. This module exposes the TextArena `Env` interface through the standard HTTP server/client APIs used by other OpenEnv environments, enabling quick experimentation with the full suite of word, reasoning, and multi-agent games.
|
| 20 |
|
| 21 |
## Quick Start
|
| 22 |
|
| 23 |
The simplest way to use the TextArena environment is through the `TextArenaEnv` class:
|
| 24 |
|
| 25 |
```python
|
| 26 |
+
from textarena_env import TextArenaAction, TextArenaEnv
|
| 27 |
|
| 28 |
try:
|
| 29 |
# Create environment from Docker image
|
| 30 |
+
env = TextArenaEnv.from_docker_image("textarena-env:latest")
|
| 31 |
+
|
| 32 |
+
# Reset to start a new episode
|
| 33 |
+
result = env.reset()
|
| 34 |
+
print(f"Game prompt:\n{result.observation.prompt}")
|
| 35 |
+
|
| 36 |
+
# Play a few turns (example: Wordle guesses)
|
| 37 |
+
guesses = ["[crane]", "[slate]", "[audio]"]
|
| 38 |
+
|
| 39 |
+
for guess in guesses:
|
| 40 |
+
result = env.step(TextArenaAction(message=guess))
|
| 41 |
|
| 42 |
+
# Check messages for feedback
|
| 43 |
+
for message in result.observation.messages:
|
| 44 |
+
print(f"Response: {message.content}")
|
| 45 |
|
| 46 |
+
print(f"Reward: {result.reward}")
|
| 47 |
+
print(f"Done: {result.done}")
|
| 48 |
|
| 49 |
+
if result.done:
|
| 50 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
finally:
|
| 53 |
# Always clean up
|
| 54 |
+
env.close()
|
| 55 |
```
|
| 56 |
|
| 57 |
That's it! The `TextArenaEnv.from_docker_image()` method handles:
|
|
|
|
| 127 |
## Environment Details
|
| 128 |
|
| 129 |
### Action
|
| 130 |
+
|
| 131 |
**TextArenaAction**: Contains a single field
|
| 132 |
+
- `message` (str) - The message/action to send to the game
|
| 133 |
|
| 134 |
### Observation
|
| 135 |
+
|
| 136 |
+
**TextArenaObservation**: Contains the game state and response
|
| 137 |
+
|
| 138 |
+
- `prompt` (str) - Game instructions and context
|
| 139 |
+
- `messages` (List[TextArenaMessage]) - Conversation history with the game
|
| 140 |
+
- `current_player_id` (int) - ID of the current player
|
| 141 |
+
- `legal_players` (List[int]) - List of valid player IDs
|
| 142 |
+
- `info` (Dict) - Additional game metadata
|
| 143 |
+
- `reward` (float) - Reward for the current step (inherited from Observation)
|
| 144 |
+
- `done` (bool) - Whether the episode has ended (inherited from Observation)
|
| 145 |
+
|
| 146 |
+
### TextArenaMessage
|
| 147 |
+
|
| 148 |
+
Each message in the conversation has:
|
| 149 |
+
|
| 150 |
+
- `sender_id` (int) - ID of the message sender
|
| 151 |
+
- `content` (str) - The message content
|
| 152 |
+
- `category` (str) - Message type (e.g., "PROMPT", "MESSAGE")
|
| 153 |
+
|
| 154 |
+
### State
|
| 155 |
+
|
| 156 |
+
**TextArenaState**: Server-side state snapshot
|
| 157 |
+
|
| 158 |
+
- `episode_id` (str) - Unique identifier for the current episode
|
| 159 |
+
- `step_count` (int) - Number of steps taken in the current episode
|
| 160 |
+
- `env_id` (str) - The TextArena environment ID (e.g., "Wordle-v0")
|
| 161 |
+
- `num_players` (int) - Number of players in the game
|
| 162 |
+
- `max_turns` (Optional[int]) - Maximum turns allowed
|
| 163 |
+
- `turn` (int) - Current turn number
|
| 164 |
+
- `last_reward` (float) - Most recent reward
|
| 165 |
+
- `last_info` (Dict) - Most recent info dictionary
|
| 166 |
+
- `raw_state` (Dict) - Raw TextArena state snapshot
|
| 167 |
|
| 168 |
### Reward
|
| 169 |
+
|
| 170 |
+
Rewards are determined by the underlying TextArena game. For example:
|
| 171 |
+
- **Wordle-v0**: Positive reward for winning, includes reward signals for green/yellow letter matches
|
|
|
|
| 172 |
|
| 173 |
## Advanced Usage
|
| 174 |
|
|
|
|
| 177 |
If you already have a TextArena environment server running, you can connect directly:
|
| 178 |
|
| 179 |
```python
|
| 180 |
+
from textarena_env import TextArenaEnv, TextArenaAction
|
| 181 |
|
| 182 |
# Connect to existing server
|
| 183 |
+
env = TextArenaEnv(base_url="<ENV_HTTP_URL_HERE>")
|
| 184 |
|
| 185 |
# Use as normal
|
| 186 |
+
result = env.reset()
|
| 187 |
+
result = env.step(TextArenaAction(message="[crane]"))
|
| 188 |
+
|
| 189 |
+
# Close connection (does NOT stop the server)
|
| 190 |
+
env.close()
|
| 191 |
```
|
| 192 |
|
| 193 |
+
### Environment Configuration
|
| 194 |
+
|
| 195 |
+
The server supports configuration via environment variables:
|
| 196 |
+
|
| 197 |
+
- `TEXTARENA_ENV_ID` - Game to load (default: "Wordle-v0")
|
| 198 |
+
- `TEXTARENA_NUM_PLAYERS` - Number of players (default: 1)
|
| 199 |
+
- `TEXTARENA_MAX_TURNS` - Maximum turns per episode
|
| 200 |
+
- `TEXTARENA_DOWNLOAD_NLTK` - Download NLTK data (default: "1")
|
| 201 |
+
- `TEXTARENA_KW_*` - Pass additional kwargs to TextArena (e.g., `TEXTARENA_KW_difficulty=hard`)
|
| 202 |
|
| 203 |
## Development & Testing
|
| 204 |
|
|
|
|
| 206 |
|
| 207 |
Test the environment logic directly without starting the HTTP server:
|
| 208 |
|
| 209 |
+
```python
|
| 210 |
+
from textarena_env.server.environment import TextArenaEnvironment
|
| 211 |
+
from textarena_env.models import TextArenaAction
|
| 212 |
+
|
| 213 |
+
# Create environment directly
|
| 214 |
+
env = TextArenaEnvironment(env_id="Wordle-v0", num_players=1)
|
| 215 |
|
| 216 |
+
# Test reset
|
| 217 |
+
obs = env.reset()
|
| 218 |
+
print(f"Prompt: {obs.prompt}")
|
| 219 |
+
|
| 220 |
+
# Test step
|
| 221 |
+
obs = env.step(TextArenaAction(message="[crane]"))
|
| 222 |
+
print(f"Done: {obs.done}, Reward: {obs.reward}")
|
| 223 |
+
```
|
| 224 |
|
| 225 |
### Running Locally
|
| 226 |
|
|
|
|
| 231 |
uv venv && source .venv/bin/activate
|
| 232 |
uv pip install -e .
|
| 233 |
|
| 234 |
+
# Start the server
|
| 235 |
python -m uvicorn server.app:app --reload
|
| 236 |
```
|
| 237 |
|
| 238 |
+
Or using the CLI entry point:
|
| 239 |
+
|
| 240 |
+
```bash
|
| 241 |
+
uv run --project . server --port 8000
|
| 242 |
+
```
|
| 243 |
+
|
| 244 |
## Project Structure
|
| 245 |
|
| 246 |
```
|
| 247 |
+
textarena_env/
|
| 248 |
├── __init__.py # Module exports
|
| 249 |
├── README.md # This file
|
| 250 |
├── openenv.yaml # OpenEnv manifest
|
| 251 |
├── pyproject.toml # Project metadata and dependencies
|
| 252 |
├── uv.lock # Locked dependencies (generated)
|
| 253 |
├── client.py # TextArenaEnv client implementation
|
| 254 |
+
├── models.py # Action, Observation, and State models
|
| 255 |
+
├── rewards.py # Reward provider utilities
|
| 256 |
└── server/
|
| 257 |
├── __init__.py # Server module exports
|
| 258 |
+
├── environment.py # Core TextArenaEnvironment implementation
|
| 259 |
├── app.py # FastAPI application
|
| 260 |
└── Dockerfile # Container image definition
|
| 261 |
```
|
client.py
CHANGED
|
@@ -35,13 +35,13 @@ class TextArenaEnv(EnvClient[TextArenaAction, TextArenaObservation, TextArenaSta
|
|
| 35 |
|
| 36 |
Example:
|
| 37 |
>>> # Connect to a running server
|
| 38 |
-
>>> client = TextArenaEnv(base_url="
|
| 39 |
>>> result = client.reset()
|
| 40 |
-
>>> print(result.observation.
|
| 41 |
>>>
|
| 42 |
-
>>> # Send
|
| 43 |
-
>>> result = client.step(TextArenaAction(message="
|
| 44 |
-
>>> print(result.observation.
|
| 45 |
>>> print(result.reward)
|
| 46 |
|
| 47 |
Example with Docker:
|
|
|
|
| 35 |
|
| 36 |
Example:
|
| 37 |
>>> # Connect to a running server
|
| 38 |
+
>>> client = TextArenaEnv(base_url="<ENV_HTTP_URL_HERE>")
|
| 39 |
>>> result = client.reset()
|
| 40 |
+
>>> print(result.observation.prompt)
|
| 41 |
>>>
|
| 42 |
+
>>> # Send an action
|
| 43 |
+
>>> result = client.step(TextArenaAction(message="[crane]"))
|
| 44 |
+
>>> print(result.observation.messages)
|
| 45 |
>>> print(result.reward)
|
| 46 |
|
| 47 |
Example with Docker:
|
models.py
CHANGED
|
@@ -7,15 +7,15 @@
|
|
| 7 |
"""
|
| 8 |
Data models for the TextArena Environment.
|
| 9 |
|
| 10 |
-
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
from __future__ import annotations
|
| 14 |
|
| 15 |
-
from pydantic import Field
|
| 16 |
from typing import Any, Dict, List, Optional
|
| 17 |
|
| 18 |
-
from pydantic import BaseModel, Field
|
| 19 |
|
| 20 |
from openenv.core.env_server.types import Action, Observation, State
|
| 21 |
|
|
@@ -43,10 +43,12 @@ class TextArenaObservation(Observation):
|
|
| 43 |
legal_players: List[int] = Field(default_factory=list)
|
| 44 |
info: Dict[str, Any] = Field(default_factory=dict)
|
| 45 |
|
| 46 |
-
|
| 47 |
class TextArenaState(State):
|
| 48 |
"""Structured state snapshot for the server."""
|
| 49 |
|
|
|
|
|
|
|
| 50 |
env_id: str
|
| 51 |
num_players: int
|
| 52 |
max_turns: Optional[int] = None
|
|
@@ -54,4 +56,3 @@ class TextArenaState(State):
|
|
| 54 |
last_reward: float = 0.0
|
| 55 |
last_info: Dict[str, Any] = Field(default_factory=dict)
|
| 56 |
raw_state: Dict[str, Any] = Field(default_factory=dict)
|
| 57 |
-
|
|
|
|
| 7 |
"""
|
| 8 |
Data models for the TextArena Environment.
|
| 9 |
|
| 10 |
+
This module defines the action, observation, and state models for interacting
|
| 11 |
+
with TextArena game environments (e.g., Wordle-v0).
|
| 12 |
"""
|
| 13 |
|
| 14 |
from __future__ import annotations
|
| 15 |
|
| 16 |
+
from pydantic import BaseModel, Field
|
| 17 |
from typing import Any, Dict, List, Optional
|
| 18 |
|
|
|
|
| 19 |
|
| 20 |
from openenv.core.env_server.types import Action, Observation, State
|
| 21 |
|
|
|
|
| 43 |
legal_players: List[int] = Field(default_factory=list)
|
| 44 |
info: Dict[str, Any] = Field(default_factory=dict)
|
| 45 |
|
| 46 |
+
|
| 47 |
class TextArenaState(State):
|
| 48 |
"""Structured state snapshot for the server."""
|
| 49 |
|
| 50 |
+
episode_id: Optional[str] = None
|
| 51 |
+
step_count: int = 0
|
| 52 |
env_id: str
|
| 53 |
num_players: int
|
| 54 |
max_turns: Optional[int] = None
|
|
|
|
| 56 |
last_reward: float = 0.0
|
| 57 |
last_info: Dict[str, Any] = Field(default_factory=dict)
|
| 58 |
raw_state: Dict[str, Any] = Field(default_factory=dict)
|
|
|
pyproject.toml
CHANGED
|
@@ -40,13 +40,11 @@ dev = [
|
|
| 40 |
|
| 41 |
[project.scripts]
|
| 42 |
# Server entry point - enables running via: uv run --project . server
|
| 43 |
-
# or: python -m
|
| 44 |
-
server = "
|
| 45 |
|
| 46 |
[tool.setuptools]
|
| 47 |
# Explicitly list packages - "textarena_env" maps to current dir
|
| 48 |
packages = ["textarena_env", "textarena_env.server"]
|
| 49 |
package-dir = {"textarena_env" = ".", "textarena_env.server" = "server"}
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
|
|
|
| 40 |
|
| 41 |
[project.scripts]
|
| 42 |
# Server entry point - enables running via: uv run --project . server
|
| 43 |
+
# or: python -m textarena_env.server.app
|
| 44 |
+
server = "textarena_env.server.app:main"
|
| 45 |
|
| 46 |
[tool.setuptools]
|
| 47 |
# Explicitly list packages - "textarena_env" maps to current dir
|
| 48 |
packages = ["textarena_env", "textarena_env.server"]
|
| 49 |
package-dir = {"textarena_env" = ".", "textarena_env.server" = "server"}
|
| 50 |
|
|
|
|
|
|
rewards.py
CHANGED
|
@@ -17,7 +17,9 @@ class RewardProvider(Protocol):
|
|
| 17 |
def reset(self) -> None:
|
| 18 |
"""Clear any internal state before a new episode."""
|
| 19 |
|
| 20 |
-
def compute(
|
|
|
|
|
|
|
| 21 |
"""Return a mapping of reward names to float values for the step."""
|
| 22 |
|
| 23 |
|
|
@@ -92,12 +94,16 @@ class _WordleRewardProvider:
|
|
| 92 |
def reset(self) -> None:
|
| 93 |
self._guess_history.clear()
|
| 94 |
|
| 95 |
-
def compute(
|
|
|
|
|
|
|
| 96 |
guess = extract_guess(action.message)
|
| 97 |
feedback = extract_wordle_feedback(observation)
|
| 98 |
|
| 99 |
normalized_guess = guess if guess and guess != "[dunno]" else ""
|
| 100 |
-
previous_occurrences =
|
|
|
|
|
|
|
| 101 |
|
| 102 |
green_score = 0.0
|
| 103 |
yellow_score = 0.0
|
|
|
|
| 17 |
def reset(self) -> None:
|
| 18 |
"""Clear any internal state before a new episode."""
|
| 19 |
|
| 20 |
+
def compute(
|
| 21 |
+
self, *, action: TextArenaAction, observation: TextArenaObservation
|
| 22 |
+
) -> Dict[str, float]:
|
| 23 |
"""Return a mapping of reward names to float values for the step."""
|
| 24 |
|
| 25 |
|
|
|
|
| 94 |
def reset(self) -> None:
|
| 95 |
self._guess_history.clear()
|
| 96 |
|
| 97 |
+
def compute(
|
| 98 |
+
self, *, action: TextArenaAction, observation: TextArenaObservation
|
| 99 |
+
) -> Dict[str, float]:
|
| 100 |
guess = extract_guess(action.message)
|
| 101 |
feedback = extract_wordle_feedback(observation)
|
| 102 |
|
| 103 |
normalized_guess = guess if guess and guess != "[dunno]" else ""
|
| 104 |
+
previous_occurrences = (
|
| 105 |
+
self._guess_history.get(normalized_guess, 0) if normalized_guess else 0
|
| 106 |
+
)
|
| 107 |
|
| 108 |
green_score = 0.0
|
| 109 |
yellow_score = 0.0
|
server/app.py
CHANGED
|
@@ -71,7 +71,7 @@ def main(host: str = "0.0.0.0", port: int = 8000):
|
|
| 71 |
This function enables running the server without Docker:
|
| 72 |
uv run --project . server
|
| 73 |
uv run --project . server --port 8001
|
| 74 |
-
python -m
|
| 75 |
|
| 76 |
Args:
|
| 77 |
host: Host address to bind to (default: "0.0.0.0")
|
|
@@ -79,7 +79,7 @@ def main(host: str = "0.0.0.0", port: int = 8000):
|
|
| 79 |
|
| 80 |
For production deployments, consider using uvicorn directly with
|
| 81 |
multiple workers:
|
| 82 |
-
uvicorn
|
| 83 |
"""
|
| 84 |
import uvicorn
|
| 85 |
|
|
|
|
| 71 |
This function enables running the server without Docker:
|
| 72 |
uv run --project . server
|
| 73 |
uv run --project . server --port 8001
|
| 74 |
+
python -m textarena_env.server.app
|
| 75 |
|
| 76 |
Args:
|
| 77 |
host: Host address to bind to (default: "0.0.0.0")
|
|
|
|
| 79 |
|
| 80 |
For production deployments, consider using uvicorn directly with
|
| 81 |
multiple workers:
|
| 82 |
+
uvicorn textarena_env.server.app:app --workers 4
|
| 83 |
"""
|
| 84 |
import uvicorn
|
| 85 |
|
server/environment.py
CHANGED
|
@@ -38,6 +38,17 @@ except ImportError:
|
|
| 38 |
|
| 39 |
_TEXTARENA_MODULE: Any | None = None
|
| 40 |
_TEXTARENA_IMPORT_ERROR: Exception | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
def _import_textarena() -> Any:
|
|
@@ -85,8 +96,7 @@ class TextArenaEnvironment(Environment):
|
|
| 85 |
ta = _import_textarena()
|
| 86 |
|
| 87 |
if download_nltk:
|
| 88 |
-
|
| 89 |
-
nltk.download("averaged_perceptron_tagger_eng", quiet=True)
|
| 90 |
|
| 91 |
self.env_id = env_id
|
| 92 |
self.num_players = num_players
|
|
@@ -104,10 +114,20 @@ class TextArenaEnvironment(Environment):
|
|
| 104 |
self._reward_providers: List[RewardProvider] = build_reward_providers(env_id)
|
| 105 |
self._last_reward_signals: Dict[str, float] = {}
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
# ------------------------------------------------------------------
|
| 108 |
# Environment interface
|
| 109 |
# ------------------------------------------------------------------
|
| 110 |
-
def reset(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
# TextArena observation wrappers (LLMObservationWrapper, etc.) accumulate
|
| 112 |
# observations in self.full_observations across resets. Since we can't modify TextArena,
|
| 113 |
# we need to manually clear this state to prevent history accumulation.
|
|
@@ -125,7 +145,7 @@ class TextArenaEnvironment(Environment):
|
|
| 125 |
for provider in self._reward_providers:
|
| 126 |
provider.reset()
|
| 127 |
|
| 128 |
-
self._state.episode_id = str(uuid4())
|
| 129 |
self._state.step_count = 0
|
| 130 |
self._state.turn = 0
|
| 131 |
self._state.last_reward = 0.0
|
|
|
|
| 38 |
|
| 39 |
_TEXTARENA_MODULE: Any | None = None
|
| 40 |
_TEXTARENA_IMPORT_ERROR: Exception | None = None
|
| 41 |
+
_NLTK_DOWNLOADED: bool = False
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _ensure_nltk_data() -> None:
|
| 45 |
+
"""Download NLTK data once per process."""
|
| 46 |
+
global _NLTK_DOWNLOADED
|
| 47 |
+
if _NLTK_DOWNLOADED:
|
| 48 |
+
return
|
| 49 |
+
nltk.download("words", quiet=True)
|
| 50 |
+
nltk.download("averaged_perceptron_tagger_eng", quiet=True)
|
| 51 |
+
_NLTK_DOWNLOADED = True
|
| 52 |
|
| 53 |
|
| 54 |
def _import_textarena() -> Any:
|
|
|
|
| 96 |
ta = _import_textarena()
|
| 97 |
|
| 98 |
if download_nltk:
|
| 99 |
+
_ensure_nltk_data()
|
|
|
|
| 100 |
|
| 101 |
self.env_id = env_id
|
| 102 |
self.num_players = num_players
|
|
|
|
| 114 |
self._reward_providers: List[RewardProvider] = build_reward_providers(env_id)
|
| 115 |
self._last_reward_signals: Dict[str, float] = {}
|
| 116 |
|
| 117 |
+
# Initialize environment state - TextArena envs require reset() to be called
|
| 118 |
+
# before step() can be used, as the internal state object isn't created until reset.
|
| 119 |
+
# This ensures the environment is always in a valid state after construction.
|
| 120 |
+
self._ta_env.reset(num_players=self.num_players)
|
| 121 |
+
|
| 122 |
# ------------------------------------------------------------------
|
| 123 |
# Environment interface
|
| 124 |
# ------------------------------------------------------------------
|
| 125 |
+
def reset(
|
| 126 |
+
self,
|
| 127 |
+
seed: Optional[int] = None,
|
| 128 |
+
episode_id: Optional[str] = None,
|
| 129 |
+
**kwargs: Any,
|
| 130 |
+
) -> TextArenaObservation:
|
| 131 |
# TextArena observation wrappers (LLMObservationWrapper, etc.) accumulate
|
| 132 |
# observations in self.full_observations across resets. Since we can't modify TextArena,
|
| 133 |
# we need to manually clear this state to prevent history accumulation.
|
|
|
|
| 145 |
for provider in self._reward_providers:
|
| 146 |
provider.reset()
|
| 147 |
|
| 148 |
+
self._state.episode_id = episode_id if episode_id is not None else str(uuid4())
|
| 149 |
self._state.step_count = 0
|
| 150 |
self._state.turn = 0
|
| 151 |
self._state.last_reward = 0.0
|
server/run_local.sh
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
export TEXTARENA_ENV_ID="
|
| 2 |
export TEXTARENA_NUM_PLAYERS=1
|
| 3 |
|
| 4 |
# Run the server
|
|
|
|
| 1 |
+
export TEXTARENA_ENV_ID="Wordle-v0"
|
| 2 |
export TEXTARENA_NUM_PLAYERS=1
|
| 3 |
|
| 4 |
# Run the server
|
uv.lock
CHANGED
|
@@ -301,11 +301,11 @@ wheels = [
|
|
| 301 |
|
| 302 |
[[package]]
|
| 303 |
name = "filelock"
|
| 304 |
-
version = "3.20.
|
| 305 |
source = { registry = "https://pypi.org/simple" }
|
| 306 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 307 |
wheels = [
|
| 308 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 309 |
]
|
| 310 |
|
| 311 |
[[package]]
|
|
@@ -1206,11 +1206,11 @@ wheels = [
|
|
| 1206 |
|
| 1207 |
[[package]]
|
| 1208 |
name = "urllib3"
|
| 1209 |
-
version = "2.6.
|
| 1210 |
source = { registry = "https://pypi.org/simple" }
|
| 1211 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 1212 |
wheels = [
|
| 1213 |
-
{ url = "https://files.pythonhosted.org/packages/
|
| 1214 |
]
|
| 1215 |
|
| 1216 |
[[package]]
|
|
|
|
| 301 |
|
| 302 |
[[package]]
|
| 303 |
name = "filelock"
|
| 304 |
+
version = "3.20.3"
|
| 305 |
source = { registry = "https://pypi.org/simple" }
|
| 306 |
+
sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" }
|
| 307 |
wheels = [
|
| 308 |
+
{ url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" },
|
| 309 |
]
|
| 310 |
|
| 311 |
[[package]]
|
|
|
|
| 1206 |
|
| 1207 |
[[package]]
|
| 1208 |
name = "urllib3"
|
| 1209 |
+
version = "2.6.3"
|
| 1210 |
source = { registry = "https://pypi.org/simple" }
|
| 1211 |
+
sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
|
| 1212 |
wheels = [
|
| 1213 |
+
{ url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
|
| 1214 |
]
|
| 1215 |
|
| 1216 |
[[package]]
|