sergiopaniego HF Staff commited on
Commit
0f3ecd2
·
verified ·
1 Parent(s): 8418d64

Upload folder using huggingface_hub

Browse files
Files changed (10) hide show
  1. Dockerfile +12 -9
  2. README.md +102 -44
  3. client.py +5 -5
  4. models.py +6 -5
  5. pyproject.toml +2 -4
  6. rewards.py +9 -3
  7. server/app.py +2 -2
  8. server/environment.py +24 -4
  9. server/run_local.sh +1 -1
  10. uv.lock +6 -6
Dockerfile CHANGED
@@ -17,7 +17,6 @@ WORKDIR /app
17
 
18
  # Build argument to control whether we're building standalone or in-repo
19
  ARG BUILD_MODE=in-repo
20
- ARG ENV_NAME=textarena
21
 
22
  # Copy environment code (always at root of build context)
23
  COPY . /app/env
@@ -33,16 +32,14 @@ RUN if ! command -v uv >/dev/null 2>&1; then \
33
  mv /root/.local/bin/uvx /usr/local/bin/uvx; \
34
  fi
35
 
36
- # Install system libraries required by TextArena (cv2 needs libGL, glib)
37
- # Also install git for building from git repos
38
  RUN apt-get update && apt-get install -y --no-install-recommends \
39
- libgl1 \
40
- libglib2.0-0 \
41
  git \
42
  && rm -rf /var/lib/apt/lists/*
43
-
44
  # Install dependencies using uv sync
45
- # If uv.lock exists, use it; otherwise resolve on the fly
 
46
  RUN --mount=type=cache,target=/root/.cache/uv \
47
  if [ -f uv.lock ]; then \
48
  uv sync --frozen --no-install-project --no-editable; \
@@ -62,6 +59,12 @@ FROM ${BASE_IMAGE}
62
 
63
  WORKDIR /app
64
 
 
 
 
 
 
 
65
  # Copy the virtual environment from builder
66
  COPY --from=builder /app/env/.venv /app/.venv
67
 
@@ -74,9 +77,9 @@ ENV PATH="/app/.venv/bin:$PATH"
74
  # Set PYTHONPATH so imports work correctly
75
  ENV PYTHONPATH="/app/env:$PYTHONPATH"
76
 
77
- # Health check
78
  HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
79
- CMD curl -f http://localhost:8000/health || exit 1
80
 
81
  # Run the FastAPI server
82
  # The module path is constructed to work with the /app/env structure
 
17
 
18
  # Build argument to control whether we're building standalone or in-repo
19
  ARG BUILD_MODE=in-repo
 
20
 
21
  # Copy environment code (always at root of build context)
22
  COPY . /app/env
 
32
  mv /root/.local/bin/uvx /usr/local/bin/uvx; \
33
  fi
34
 
35
+ # Install git for building from git repos (build-time only)
 
36
  RUN apt-get update && apt-get install -y --no-install-recommends \
 
 
37
  git \
38
  && rm -rf /var/lib/apt/lists/*
39
+
40
  # Install dependencies using uv sync
41
+ # First pass: install dependencies without the project (for better caching)
42
+ # Second pass: install the project itself
43
  RUN --mount=type=cache,target=/root/.cache/uv \
44
  if [ -f uv.lock ]; then \
45
  uv sync --frozen --no-install-project --no-editable; \
 
59
 
60
  WORKDIR /app
61
 
62
+ # Install runtime system libraries required by TextArena (cv2 needs libGL, glib)
63
+ RUN apt-get update && apt-get install -y --no-install-recommends \
64
+ libgl1 \
65
+ libglib2.0-0 \
66
+ && rm -rf /var/lib/apt/lists/*
67
+
68
  # Copy the virtual environment from builder
69
  COPY --from=builder /app/env/.venv /app/.venv
70
 
 
77
  # Set PYTHONPATH so imports work correctly
78
  ENV PYTHONPATH="/app/env:$PYTHONPATH"
79
 
80
+ # Health check using Python (more portable than curl/wget)
81
  HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
82
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
83
 
84
  # Run the FastAPI server
85
  # The module path is constructed to work with the /app/env structure
README.md CHANGED
@@ -13,36 +13,45 @@ tags:
13
 
14
  # TextArena Environment
15
 
16
- A simple test environment that echoes back messages. Perfect for testing the env APIs as well as demonstrating environment usage patterns.
 
 
 
17
 
18
  ## Quick Start
19
 
20
  The simplest way to use the TextArena environment is through the `TextArenaEnv` class:
21
 
22
  ```python
23
- from textarena import TextArenaAction, TextArenaEnv
24
 
25
  try:
26
  # Create environment from Docker image
27
- textarenaenv = TextArenaEnv.from_docker_image("textarena-env:latest")
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Reset
30
- result = textArenaEnv.reset()
31
- print(f"Reset: {result.observation.echoed_message}")
32
 
33
- # Send multiple messages
34
- messages = ["Hello, World!", "Testing echo", "Final message"]
35
 
36
- for msg in messages:
37
- result = textArenaEnv.step(TextArenaAction(message=msg))
38
- print(f"Sent: '{msg}'")
39
- print(f" → Echoed: '{result.observation.echoed_message}'")
40
- print(f" → Length: {result.observation.message_length}")
41
- print(f" → Reward: {result.reward}")
42
 
43
  finally:
44
  # Always clean up
45
- textArenaEnv.close()
46
  ```
47
 
48
  That's it! The `TextArenaEnv.from_docker_image()` method handles:
@@ -118,22 +127,48 @@ The deployed space includes:
118
  ## Environment Details
119
 
120
  ### Action
 
121
  **TextArenaAction**: Contains a single field
122
- - `message` (str) - The message to echo back
123
 
124
  ### Observation
125
- **TextArenaObservation**: Contains the echo response and metadata
126
- - `echoed_message` (str) - The message echoed back
127
- - `message_length` (int) - Length of the message
128
- - `reward` (float) - Reward based on message length (length × 0.1)
129
- - `done` (bool) - Always False for echo environment
130
- - `metadata` (dict) - Additional info like step count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  ### Reward
133
- The reward is calculated as: `message_length × 0.1`
134
- - "Hi" reward: 0.2
135
- - "Hello, World!" reward: 1.3
136
- - Empty message → reward: 0.0
137
 
138
  ## Advanced Usage
139
 
@@ -142,17 +177,28 @@ The reward is calculated as: `message_length × 0.1`
142
  If you already have a TextArena environment server running, you can connect directly:
143
 
144
  ```python
145
- from textarena import TextArenaEnv
146
 
147
  # Connect to existing server
148
- textarenaenv = TextArenaEnv(base_url="<ENV_HTTP_URL_HERE>")
149
 
150
  # Use as normal
151
- result = textarenaenv.reset()
152
- result = textarenaenv.step(TextArenaAction(message="Hello!"))
 
 
 
153
  ```
154
 
155
- Note: When connecting to an existing server, `textarenaenv.close()` will NOT stop the server.
 
 
 
 
 
 
 
 
156
 
157
  ## Development & Testing
158
 
@@ -160,16 +206,21 @@ Note: When connecting to an existing server, `textarenaenv.close()` will NOT sto
160
 
161
  Test the environment logic directly without starting the HTTP server:
162
 
163
- ```bash
164
- # From the server directory
165
- python3 server/textarena_environment.py
166
- ```
 
 
167
 
168
- This verifies that:
169
- - Environment resets correctly
170
- - Step executes actions properly
171
- - State tracking works
172
- - Rewards are calculated correctly
 
 
 
173
 
174
  ### Running Locally
175
 
@@ -180,24 +231,31 @@ Run the server locally for development:
180
  uv venv && source .venv/bin/activate
181
  uv pip install -e .
182
 
183
- # Start the server (use python -m to ensure venv Python is used)
184
  python -m uvicorn server.app:app --reload
185
  ```
186
 
 
 
 
 
 
 
187
  ## Project Structure
188
 
189
  ```
190
- textarena/
191
  ├── __init__.py # Module exports
192
  ├── README.md # This file
193
  ├── openenv.yaml # OpenEnv manifest
194
  ├── pyproject.toml # Project metadata and dependencies
195
  ├── uv.lock # Locked dependencies (generated)
196
  ├── client.py # TextArenaEnv client implementation
197
- ├── models.py # Action and Observation models
 
198
  └── server/
199
  ├── __init__.py # Server module exports
200
- ├── textarena_environment.py # Core environment logic
201
  ├── app.py # FastAPI application
202
  └── Dockerfile # Container image definition
203
  ```
 
13
 
14
  # TextArena Environment
15
 
16
+ An OpenEnv wrapper for [TextArena](https://github.com/textarena/textarena) game environments. Supports text-based games like Wordle, providing a standardized API for agent interaction.
17
+
18
+ > [!NOTE]
19
+ > Generic wrapper for any [TextArena](https://www.textarena.ai/docs/overview) game inside OpenEnv. This module exposes the TextArena `Env` interface through the standard HTTP server/client APIs used by other OpenEnv environments, enabling quick experimentation with the full suite of word, reasoning, and multi-agent games.
20
 
21
  ## Quick Start
22
 
23
  The simplest way to use the TextArena environment is through the `TextArenaEnv` class:
24
 
25
  ```python
26
+ from textarena_env import TextArenaAction, TextArenaEnv
27
 
28
  try:
29
  # Create environment from Docker image
30
+ env = TextArenaEnv.from_docker_image("textarena-env:latest")
31
+
32
+ # Reset to start a new episode
33
+ result = env.reset()
34
+ print(f"Game prompt:\n{result.observation.prompt}")
35
+
36
+ # Play a few turns (example: Wordle guesses)
37
+ guesses = ["[crane]", "[slate]", "[audio]"]
38
+
39
+ for guess in guesses:
40
+ result = env.step(TextArenaAction(message=guess))
41
 
42
+ # Check messages for feedback
43
+ for message in result.observation.messages:
44
+ print(f"Response: {message.content}")
45
 
46
+ print(f"Reward: {result.reward}")
47
+ print(f"Done: {result.done}")
48
 
49
+ if result.done:
50
+ break
 
 
 
 
51
 
52
  finally:
53
  # Always clean up
54
+ env.close()
55
  ```
56
 
57
  That's it! The `TextArenaEnv.from_docker_image()` method handles:
 
127
  ## Environment Details
128
 
129
  ### Action
130
+
131
  **TextArenaAction**: Contains a single field
132
+ - `message` (str) - The message/action to send to the game
133
 
134
  ### Observation
135
+
136
+ **TextArenaObservation**: Contains the game state and response
137
+
138
+ - `prompt` (str) - Game instructions and context
139
+ - `messages` (List[TextArenaMessage]) - Conversation history with the game
140
+ - `current_player_id` (int) - ID of the current player
141
+ - `legal_players` (List[int]) - List of valid player IDs
142
+ - `info` (Dict) - Additional game metadata
143
+ - `reward` (float) - Reward for the current step (inherited from Observation)
144
+ - `done` (bool) - Whether the episode has ended (inherited from Observation)
145
+
146
+ ### TextArenaMessage
147
+
148
+ Each message in the conversation has:
149
+
150
+ - `sender_id` (int) - ID of the message sender
151
+ - `content` (str) - The message content
152
+ - `category` (str) - Message type (e.g., "PROMPT", "MESSAGE")
153
+
154
+ ### State
155
+
156
+ **TextArenaState**: Server-side state snapshot
157
+
158
+ - `episode_id` (str) - Unique identifier for the current episode
159
+ - `step_count` (int) - Number of steps taken in the current episode
160
+ - `env_id` (str) - The TextArena environment ID (e.g., "Wordle-v0")
161
+ - `num_players` (int) - Number of players in the game
162
+ - `max_turns` (Optional[int]) - Maximum turns allowed
163
+ - `turn` (int) - Current turn number
164
+ - `last_reward` (float) - Most recent reward
165
+ - `last_info` (Dict) - Most recent info dictionary
166
+ - `raw_state` (Dict) - Raw TextArena state snapshot
167
 
168
  ### Reward
169
+
170
+ Rewards are determined by the underlying TextArena game. For example:
171
+ - **Wordle-v0**: Positive reward for winning, includes reward signals for green/yellow letter matches
 
172
 
173
  ## Advanced Usage
174
 
 
177
  If you already have a TextArena environment server running, you can connect directly:
178
 
179
  ```python
180
+ from textarena_env import TextArenaEnv, TextArenaAction
181
 
182
  # Connect to existing server
183
+ env = TextArenaEnv(base_url="<ENV_HTTP_URL_HERE>")
184
 
185
  # Use as normal
186
+ result = env.reset()
187
+ result = env.step(TextArenaAction(message="[crane]"))
188
+
189
+ # Close connection (does NOT stop the server)
190
+ env.close()
191
  ```
192
 
193
+ ### Environment Configuration
194
+
195
+ The server supports configuration via environment variables:
196
+
197
+ - `TEXTARENA_ENV_ID` - Game to load (default: "Wordle-v0")
198
+ - `TEXTARENA_NUM_PLAYERS` - Number of players (default: 1)
199
+ - `TEXTARENA_MAX_TURNS` - Maximum turns per episode
200
+ - `TEXTARENA_DOWNLOAD_NLTK` - Download NLTK data (default: "1")
201
+ - `TEXTARENA_KW_*` - Pass additional kwargs to TextArena (e.g., `TEXTARENA_KW_difficulty=hard`)
202
 
203
  ## Development & Testing
204
 
 
206
 
207
  Test the environment logic directly without starting the HTTP server:
208
 
209
+ ```python
210
+ from textarena_env.server.environment import TextArenaEnvironment
211
+ from textarena_env.models import TextArenaAction
212
+
213
+ # Create environment directly
214
+ env = TextArenaEnvironment(env_id="Wordle-v0", num_players=1)
215
 
216
+ # Test reset
217
+ obs = env.reset()
218
+ print(f"Prompt: {obs.prompt}")
219
+
220
+ # Test step
221
+ obs = env.step(TextArenaAction(message="[crane]"))
222
+ print(f"Done: {obs.done}, Reward: {obs.reward}")
223
+ ```
224
 
225
  ### Running Locally
226
 
 
231
  uv venv && source .venv/bin/activate
232
  uv pip install -e .
233
 
234
+ # Start the server
235
  python -m uvicorn server.app:app --reload
236
  ```
237
 
238
+ Or using the CLI entry point:
239
+
240
+ ```bash
241
+ uv run --project . server --port 8000
242
+ ```
243
+
244
  ## Project Structure
245
 
246
  ```
247
+ textarena_env/
248
  ├── __init__.py # Module exports
249
  ├── README.md # This file
250
  ├── openenv.yaml # OpenEnv manifest
251
  ├── pyproject.toml # Project metadata and dependencies
252
  ├── uv.lock # Locked dependencies (generated)
253
  ├── client.py # TextArenaEnv client implementation
254
+ ├── models.py # Action, Observation, and State models
255
+ ├── rewards.py # Reward provider utilities
256
  └── server/
257
  ├── __init__.py # Server module exports
258
+ ├── environment.py # Core TextArenaEnvironment implementation
259
  ├── app.py # FastAPI application
260
  └── Dockerfile # Container image definition
261
  ```
client.py CHANGED
@@ -35,13 +35,13 @@ class TextArenaEnv(EnvClient[TextArenaAction, TextArenaObservation, TextArenaSta
35
 
36
  Example:
37
  >>> # Connect to a running server
38
- >>> client = TextArenaEnv(base_url="http://localhost:8000")
39
  >>> result = client.reset()
40
- >>> print(result.observation.echoed_message)
41
  >>>
42
- >>> # Send a message
43
- >>> result = client.step(TextArenaAction(message="Hello!"))
44
- >>> print(result.observation.echoed_message)
45
  >>> print(result.reward)
46
 
47
  Example with Docker:
 
35
 
36
  Example:
37
  >>> # Connect to a running server
38
+ >>> client = TextArenaEnv(base_url="<ENV_HTTP_URL_HERE>")
39
  >>> result = client.reset()
40
+ >>> print(result.observation.prompt)
41
  >>>
42
+ >>> # Send an action
43
+ >>> result = client.step(TextArenaAction(message="[crane]"))
44
+ >>> print(result.observation.messages)
45
  >>> print(result.reward)
46
 
47
  Example with Docker:
models.py CHANGED
@@ -7,15 +7,15 @@
7
  """
8
  Data models for the TextArena Environment.
9
 
10
- The textarena environment is a simple test environment that echoes back messages.
 
11
  """
12
 
13
  from __future__ import annotations
14
 
15
- from pydantic import Field
16
  from typing import Any, Dict, List, Optional
17
 
18
- from pydantic import BaseModel, Field
19
 
20
  from openenv.core.env_server.types import Action, Observation, State
21
 
@@ -43,10 +43,12 @@ class TextArenaObservation(Observation):
43
  legal_players: List[int] = Field(default_factory=list)
44
  info: Dict[str, Any] = Field(default_factory=dict)
45
 
46
-
47
  class TextArenaState(State):
48
  """Structured state snapshot for the server."""
49
 
 
 
50
  env_id: str
51
  num_players: int
52
  max_turns: Optional[int] = None
@@ -54,4 +56,3 @@ class TextArenaState(State):
54
  last_reward: float = 0.0
55
  last_info: Dict[str, Any] = Field(default_factory=dict)
56
  raw_state: Dict[str, Any] = Field(default_factory=dict)
57
-
 
7
  """
8
  Data models for the TextArena Environment.
9
 
10
+ This module defines the action, observation, and state models for interacting
11
+ with TextArena game environments (e.g., Wordle-v0).
12
  """
13
 
14
  from __future__ import annotations
15
 
16
+ from pydantic import BaseModel, Field
17
  from typing import Any, Dict, List, Optional
18
 
 
19
 
20
  from openenv.core.env_server.types import Action, Observation, State
21
 
 
43
  legal_players: List[int] = Field(default_factory=list)
44
  info: Dict[str, Any] = Field(default_factory=dict)
45
 
46
+
47
  class TextArenaState(State):
48
  """Structured state snapshot for the server."""
49
 
50
+ episode_id: Optional[str] = None
51
+ step_count: int = 0
52
  env_id: str
53
  num_players: int
54
  max_turns: Optional[int] = None
 
56
  last_reward: float = 0.0
57
  last_info: Dict[str, Any] = Field(default_factory=dict)
58
  raw_state: Dict[str, Any] = Field(default_factory=dict)
 
pyproject.toml CHANGED
@@ -40,13 +40,11 @@ dev = [
40
 
41
  [project.scripts]
42
  # Server entry point - enables running via: uv run --project . server
43
- # or: python -m textarena.server.app
44
- server = "textarena.server.app:main"
45
 
46
  [tool.setuptools]
47
  # Explicitly list packages - "textarena_env" maps to current dir
48
  packages = ["textarena_env", "textarena_env.server"]
49
  package-dir = {"textarena_env" = ".", "textarena_env.server" = "server"}
50
 
51
-
52
-
 
40
 
41
  [project.scripts]
42
  # Server entry point - enables running via: uv run --project . server
43
+ # or: python -m textarena_env.server.app
44
+ server = "textarena_env.server.app:main"
45
 
46
  [tool.setuptools]
47
  # Explicitly list packages - "textarena_env" maps to current dir
48
  packages = ["textarena_env", "textarena_env.server"]
49
  package-dir = {"textarena_env" = ".", "textarena_env.server" = "server"}
50
 
 
 
rewards.py CHANGED
@@ -17,7 +17,9 @@ class RewardProvider(Protocol):
17
  def reset(self) -> None:
18
  """Clear any internal state before a new episode."""
19
 
20
- def compute(self, *, action: TextArenaAction, observation: TextArenaObservation) -> Dict[str, float]:
 
 
21
  """Return a mapping of reward names to float values for the step."""
22
 
23
 
@@ -92,12 +94,16 @@ class _WordleRewardProvider:
92
  def reset(self) -> None:
93
  self._guess_history.clear()
94
 
95
- def compute(self, *, action: TextArenaAction, observation: TextArenaObservation) -> Dict[str, float]:
 
 
96
  guess = extract_guess(action.message)
97
  feedback = extract_wordle_feedback(observation)
98
 
99
  normalized_guess = guess if guess and guess != "[dunno]" else ""
100
- previous_occurrences = self._guess_history.get(normalized_guess, 0) if normalized_guess else 0
 
 
101
 
102
  green_score = 0.0
103
  yellow_score = 0.0
 
17
  def reset(self) -> None:
18
  """Clear any internal state before a new episode."""
19
 
20
+ def compute(
21
+ self, *, action: TextArenaAction, observation: TextArenaObservation
22
+ ) -> Dict[str, float]:
23
  """Return a mapping of reward names to float values for the step."""
24
 
25
 
 
94
  def reset(self) -> None:
95
  self._guess_history.clear()
96
 
97
+ def compute(
98
+ self, *, action: TextArenaAction, observation: TextArenaObservation
99
+ ) -> Dict[str, float]:
100
  guess = extract_guess(action.message)
101
  feedback = extract_wordle_feedback(observation)
102
 
103
  normalized_guess = guess if guess and guess != "[dunno]" else ""
104
+ previous_occurrences = (
105
+ self._guess_history.get(normalized_guess, 0) if normalized_guess else 0
106
+ )
107
 
108
  green_score = 0.0
109
  yellow_score = 0.0
server/app.py CHANGED
@@ -71,7 +71,7 @@ def main(host: str = "0.0.0.0", port: int = 8000):
71
  This function enables running the server without Docker:
72
  uv run --project . server
73
  uv run --project . server --port 8001
74
- python -m textarena.server.app
75
 
76
  Args:
77
  host: Host address to bind to (default: "0.0.0.0")
@@ -79,7 +79,7 @@ def main(host: str = "0.0.0.0", port: int = 8000):
79
 
80
  For production deployments, consider using uvicorn directly with
81
  multiple workers:
82
- uvicorn textarena.server.app:app --workers 4
83
  """
84
  import uvicorn
85
 
 
71
  This function enables running the server without Docker:
72
  uv run --project . server
73
  uv run --project . server --port 8001
74
+ python -m textarena_env.server.app
75
 
76
  Args:
77
  host: Host address to bind to (default: "0.0.0.0")
 
79
 
80
  For production deployments, consider using uvicorn directly with
81
  multiple workers:
82
+ uvicorn textarena_env.server.app:app --workers 4
83
  """
84
  import uvicorn
85
 
server/environment.py CHANGED
@@ -38,6 +38,17 @@ except ImportError:
38
 
39
  _TEXTARENA_MODULE: Any | None = None
40
  _TEXTARENA_IMPORT_ERROR: Exception | None = None
 
 
 
 
 
 
 
 
 
 
 
41
 
42
 
43
  def _import_textarena() -> Any:
@@ -85,8 +96,7 @@ class TextArenaEnvironment(Environment):
85
  ta = _import_textarena()
86
 
87
  if download_nltk:
88
- nltk.download("words", quiet=True)
89
- nltk.download("averaged_perceptron_tagger_eng", quiet=True)
90
 
91
  self.env_id = env_id
92
  self.num_players = num_players
@@ -104,10 +114,20 @@ class TextArenaEnvironment(Environment):
104
  self._reward_providers: List[RewardProvider] = build_reward_providers(env_id)
105
  self._last_reward_signals: Dict[str, float] = {}
106
 
 
 
 
 
 
107
  # ------------------------------------------------------------------
108
  # Environment interface
109
  # ------------------------------------------------------------------
110
- def reset(self) -> TextArenaObservation:
 
 
 
 
 
111
  # TextArena observation wrappers (LLMObservationWrapper, etc.) accumulate
112
  # observations in self.full_observations across resets. Since we can't modify TextArena,
113
  # we need to manually clear this state to prevent history accumulation.
@@ -125,7 +145,7 @@ class TextArenaEnvironment(Environment):
125
  for provider in self._reward_providers:
126
  provider.reset()
127
 
128
- self._state.episode_id = str(uuid4())
129
  self._state.step_count = 0
130
  self._state.turn = 0
131
  self._state.last_reward = 0.0
 
38
 
39
  _TEXTARENA_MODULE: Any | None = None
40
  _TEXTARENA_IMPORT_ERROR: Exception | None = None
41
+ _NLTK_DOWNLOADED: bool = False
42
+
43
+
44
+ def _ensure_nltk_data() -> None:
45
+ """Download NLTK data once per process."""
46
+ global _NLTK_DOWNLOADED
47
+ if _NLTK_DOWNLOADED:
48
+ return
49
+ nltk.download("words", quiet=True)
50
+ nltk.download("averaged_perceptron_tagger_eng", quiet=True)
51
+ _NLTK_DOWNLOADED = True
52
 
53
 
54
  def _import_textarena() -> Any:
 
96
  ta = _import_textarena()
97
 
98
  if download_nltk:
99
+ _ensure_nltk_data()
 
100
 
101
  self.env_id = env_id
102
  self.num_players = num_players
 
114
  self._reward_providers: List[RewardProvider] = build_reward_providers(env_id)
115
  self._last_reward_signals: Dict[str, float] = {}
116
 
117
+ # Initialize environment state - TextArena envs require reset() to be called
118
+ # before step() can be used, as the internal state object isn't created until reset.
119
+ # This ensures the environment is always in a valid state after construction.
120
+ self._ta_env.reset(num_players=self.num_players)
121
+
122
  # ------------------------------------------------------------------
123
  # Environment interface
124
  # ------------------------------------------------------------------
125
+ def reset(
126
+ self,
127
+ seed: Optional[int] = None,
128
+ episode_id: Optional[str] = None,
129
+ **kwargs: Any,
130
+ ) -> TextArenaObservation:
131
  # TextArena observation wrappers (LLMObservationWrapper, etc.) accumulate
132
  # observations in self.full_observations across resets. Since we can't modify TextArena,
133
  # we need to manually clear this state to prevent history accumulation.
 
145
  for provider in self._reward_providers:
146
  provider.reset()
147
 
148
+ self._state.episode_id = episode_id if episode_id is not None else str(uuid4())
149
  self._state.step_count = 0
150
  self._state.turn = 0
151
  self._state.last_reward = 0.0
server/run_local.sh CHANGED
@@ -1,4 +1,4 @@
1
- export TEXTARENA_ENV_ID="Sudoku-v0"
2
  export TEXTARENA_NUM_PLAYERS=1
3
 
4
  # Run the server
 
1
+ export TEXTARENA_ENV_ID="Wordle-v0"
2
  export TEXTARENA_NUM_PLAYERS=1
3
 
4
  # Run the server
uv.lock CHANGED
@@ -301,11 +301,11 @@ wheels = [
301
 
302
  [[package]]
303
  name = "filelock"
304
- version = "3.20.2"
305
  source = { registry = "https://pypi.org/simple" }
306
- sdist = { url = "https://files.pythonhosted.org/packages/c1/e0/a75dbe4bca1e7d41307323dad5ea2efdd95408f74ab2de8bd7dba9b51a1a/filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64", size = 19510, upload-time = "2026-01-02T15:33:32.582Z" }
307
  wheels = [
308
- { url = "https://files.pythonhosted.org/packages/9a/30/ab407e2ec752aa541704ed8f93c11e2a5d92c168b8a755d818b74a3c5c2d/filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8", size = 16697, upload-time = "2026-01-02T15:33:31.133Z" },
309
  ]
310
 
311
  [[package]]
@@ -1206,11 +1206,11 @@ wheels = [
1206
 
1207
  [[package]]
1208
  name = "urllib3"
1209
- version = "2.6.2"
1210
  source = { registry = "https://pypi.org/simple" }
1211
- sdist = { url = "https://files.pythonhosted.org/packages/1e/24/a2a2ed9addd907787d7aa0355ba36a6cadf1768b934c652ea78acbd59dcd/urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797", size = 432930, upload-time = "2025-12-11T15:56:40.252Z" }
1212
  wheels = [
1213
- { url = "https://files.pythonhosted.org/packages/6d/b9/4095b668ea3678bf6a0af005527f39de12fb026516fb3df17495a733b7f8/urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd", size = 131182, upload-time = "2025-12-11T15:56:38.584Z" },
1214
  ]
1215
 
1216
  [[package]]
 
301
 
302
  [[package]]
303
  name = "filelock"
304
+ version = "3.20.3"
305
  source = { registry = "https://pypi.org/simple" }
306
+ sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485, upload-time = "2026-01-09T17:55:05.421Z" }
307
  wheels = [
308
+ { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701, upload-time = "2026-01-09T17:55:04.334Z" },
309
  ]
310
 
311
  [[package]]
 
1206
 
1207
  [[package]]
1208
  name = "urllib3"
1209
+ version = "2.6.3"
1210
  source = { registry = "https://pypi.org/simple" }
1211
+ sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
1212
  wheels = [
1213
+ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
1214
  ]
1215
 
1216
  [[package]]