Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- Dockerfile +29 -0
- README.md +179 -3
- __init__.py +18 -0
- client.py +180 -0
- examples/__init__.py +0 -0
- examples/basic_usage.py +128 -0
- examples/openenv_training.py +134 -0
- models.py +78 -0
- openenv.yaml +6 -0
- outputs/.gitkeep +0 -0
- pyproject.toml +21 -0
- server/__init__.py +5 -0
- server/app.py +151 -0
- server/chess_environment.py +326 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
gcc \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# Copy the moonfish package and rl module
|
| 11 |
+
COPY . /app/
|
| 12 |
+
|
| 13 |
+
# Install dependencies
|
| 14 |
+
RUN pip install --no-cache-dir \
|
| 15 |
+
chess>=1.10.0 \
|
| 16 |
+
fastapi>=0.100.0 \
|
| 17 |
+
uvicorn[standard]>=0.23.0 \
|
| 18 |
+
httpx>=0.24.0 \
|
| 19 |
+
pydantic>=2.0.0
|
| 20 |
+
|
| 21 |
+
# Install moonfish from the local package
|
| 22 |
+
RUN pip install --no-cache-dir -e /app
|
| 23 |
+
|
| 24 |
+
# Expose port
|
| 25 |
+
EXPOSE 8000
|
| 26 |
+
|
| 27 |
+
# Run the server
|
| 28 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 29 |
+
CMD ["python", "-m", "uvicorn", "moonfish.rl.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,186 @@
|
|
| 1 |
---
|
| 2 |
title: Moonfish Chess
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Moonfish Chess
|
| 3 |
+
emoji: ♟️
|
| 4 |
+
colorFrom: gray
|
| 5 |
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
base_path: /web
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Chess OpenEnv
|
| 13 |
+
|
| 14 |
+
A chess environment for reinforcement learning, built on [moonfish](https://github.com/luccab/moonfish) and compatible with the [OpenEnv](https://github.com/meta-pytorch/OpenEnv) framework.
|
| 15 |
+
|
| 16 |
+
## Features
|
| 17 |
+
|
| 18 |
+
- **Full Chess Rules**: Legal move generation, checkmate/stalemate detection, draw conditions
|
| 19 |
+
- **Position Evaluation**: PeSTO evaluation function from moonfish for reward shaping
|
| 20 |
+
- **OpenEnv Compatible**: Standard `reset()`, `step()`, `state()` interface
|
| 21 |
+
- **Configurable Rewards**: Win/loss/draw payoffs, illegal move penalties, evaluation-based rewards
|
| 22 |
+
- **HTTP API**: FastAPI server for remote training and multi-agent setups
|
| 23 |
+
- **Containerized**: Docker support for reproducible deployments
|
| 24 |
+
|
| 25 |
+
## Quick Start
|
| 26 |
+
|
| 27 |
+
### Local Usage (No Server)
|
| 28 |
+
|
| 29 |
+
```python
|
| 30 |
+
from moonfish.rl import ChessEnvironment, ChessAction
|
| 31 |
+
|
| 32 |
+
# Create environment
|
| 33 |
+
env = ChessEnvironment()
|
| 34 |
+
|
| 35 |
+
# Start a new game
|
| 36 |
+
obs = env.reset()
|
| 37 |
+
print(f"Legal moves: {obs.legal_moves}")
|
| 38 |
+
|
| 39 |
+
# Make a move
|
| 40 |
+
action = ChessAction(move="e2e4")
|
| 41 |
+
obs, reward, done = env.step(action)
|
| 42 |
+
|
| 43 |
+
print(f"FEN: {obs.fen}")
|
| 44 |
+
print(f"Reward: {reward}, Done: {done}")
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
### Client-Server Usage
|
| 48 |
+
|
| 49 |
+
Start the server:
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
cd moonfish/rl
|
| 53 |
+
python -m uvicorn server.app:app --host 0.0.0.0 --port 8000
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
Connect with the client:
|
| 57 |
+
|
| 58 |
+
```python
|
| 59 |
+
from moonfish.rl import ChessEnvClient, ChessAction
|
| 60 |
+
|
| 61 |
+
client = ChessEnvClient("http://localhost:8000")
|
| 62 |
+
|
| 63 |
+
obs = client.reset()
|
| 64 |
+
result = client.step(ChessAction(move="e2e4"))
|
| 65 |
+
print(f"Reward: {result.reward}")
|
| 66 |
+
|
| 67 |
+
client.close()
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
## Data Models
|
| 71 |
+
|
| 72 |
+
### ChessAction
|
| 73 |
+
```python
|
| 74 |
+
@dataclass
|
| 75 |
+
class ChessAction:
|
| 76 |
+
move: str # UCI format: "e2e4", "e7e8q" (promotion)
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
### ChessObservation
|
| 80 |
+
```python
|
| 81 |
+
@dataclass
|
| 82 |
+
class ChessObservation:
|
| 83 |
+
fen: str # Board state in FEN notation
|
| 84 |
+
legal_moves: List[str] # Available moves in UCI format
|
| 85 |
+
is_check: bool # Current player in check
|
| 86 |
+
done: bool # Game over
|
| 87 |
+
reward: Optional[float] # Terminal reward
|
| 88 |
+
result: Optional[str] # "1-0", "0-1", "1/2-1/2"
|
| 89 |
+
metadata: Dict[str, Any] # Evaluation, material, etc.
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
### ChessState
|
| 93 |
+
```python
|
| 94 |
+
@dataclass
|
| 95 |
+
class ChessState:
|
| 96 |
+
episode_id: str # Unique game identifier
|
| 97 |
+
step_count: int # Half-moves played
|
| 98 |
+
current_player: str # "white" or "black"
|
| 99 |
+
fen: str # Current position
|
| 100 |
+
move_history: List[str] # All moves in UCI format
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
## Reward Configuration
|
| 104 |
+
|
| 105 |
+
```python
|
| 106 |
+
from moonfish.rl import ChessEnvironment, RewardConfig
|
| 107 |
+
|
| 108 |
+
config = RewardConfig(
|
| 109 |
+
win=1.0, # Reward for winning
|
| 110 |
+
loss=-1.0, # Penalty for losing
|
| 111 |
+
draw=0.0, # Reward for draw
|
| 112 |
+
illegal_move=-0.1, # Penalty for illegal moves
|
| 113 |
+
use_evaluation=True, # Enable intermediate rewards
|
| 114 |
+
evaluation_scale=0.0001, # Scale for eval-based rewards
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
env = ChessEnvironment(reward_config=config)
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
## Docker
|
| 121 |
+
|
| 122 |
+
Build and run:
|
| 123 |
+
|
| 124 |
+
```bash
|
| 125 |
+
docker build -t chess-openenv .
|
| 126 |
+
docker run -p 8000:8000 chess-openenv
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
## Integration with RL Frameworks
|
| 130 |
+
|
| 131 |
+
### With TorchRL
|
| 132 |
+
|
| 133 |
+
```python
|
| 134 |
+
from moonfish.rl import ChessEnvironment, ChessAction
|
| 135 |
+
|
| 136 |
+
class ChessTorchRLWrapper:
|
| 137 |
+
def __init__(self):
|
| 138 |
+
self.env = ChessEnvironment()
|
| 139 |
+
|
| 140 |
+
def reset(self):
|
| 141 |
+
obs = self.env.reset()
|
| 142 |
+
return self._obs_to_tensor(obs)
|
| 143 |
+
|
| 144 |
+
def step(self, action_idx):
|
| 145 |
+
move = self._idx_to_move(action_idx)
|
| 146 |
+
obs, reward, done = self.env.step(ChessAction(move=move))
|
| 147 |
+
return self._obs_to_tensor(obs), reward, done
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
### With OpenEnv Training Loop
|
| 151 |
+
|
| 152 |
+
```python
|
| 153 |
+
from moonfish.rl import make_env, ChessAction
|
| 154 |
+
import random
|
| 155 |
+
|
| 156 |
+
client = make_env("http://localhost:8000")
|
| 157 |
+
|
| 158 |
+
for episode in range(100):
|
| 159 |
+
obs = client.reset()
|
| 160 |
+
episode_reward = 0
|
| 161 |
+
|
| 162 |
+
while not obs.done:
|
| 163 |
+
# Your policy here (random for demo)
|
| 164 |
+
move = random.choice(obs.legal_moves)
|
| 165 |
+
result = client.step(ChessAction(move=move))
|
| 166 |
+
obs = result.observation
|
| 167 |
+
episode_reward += result.reward
|
| 168 |
+
|
| 169 |
+
print(f"Episode {episode}: reward={episode_reward}")
|
| 170 |
+
|
| 171 |
+
client.close()
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
## API Endpoints
|
| 175 |
+
|
| 176 |
+
| Endpoint | Method | Description |
|
| 177 |
+
|----------|--------|-------------|
|
| 178 |
+
| `/health` | GET | Health check |
|
| 179 |
+
| `/metadata` | GET | Environment configuration |
|
| 180 |
+
| `/reset` | POST | Start new episode |
|
| 181 |
+
| `/step` | POST | Execute a move |
|
| 182 |
+
| `/state` | GET | Get episode metadata |
|
| 183 |
+
|
| 184 |
+
## License
|
| 185 |
+
|
| 186 |
+
MIT - See the moonfish repository for full license details.
|
__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Chess OpenEnv - A chess environment for reinforcement learning."""
|
| 2 |
+
|
| 3 |
+
from .models import ChessAction, ChessObservation, ChessState, RewardConfig
|
| 4 |
+
from .client import ChessEnvClient, StepResult, make_env
|
| 5 |
+
from .server.chess_environment import ChessEnvironment
|
| 6 |
+
|
| 7 |
+
__all__ = [
|
| 8 |
+
"ChessAction",
|
| 9 |
+
"ChessObservation",
|
| 10 |
+
"ChessState",
|
| 11 |
+
"RewardConfig",
|
| 12 |
+
"ChessEnvClient",
|
| 13 |
+
"StepResult",
|
| 14 |
+
"make_env",
|
| 15 |
+
"ChessEnvironment",
|
| 16 |
+
]
|
| 17 |
+
|
| 18 |
+
__version__ = "1.0.0"
|
client.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Client for the Chess OpenEnv environment."""
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Any, Dict, List, Optional
|
| 5 |
+
|
| 6 |
+
import httpx
|
| 7 |
+
|
| 8 |
+
from .models import ChessAction, ChessObservation, ChessState
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class StepResult:
|
| 13 |
+
"""Result from a step() call."""
|
| 14 |
+
observation: ChessObservation
|
| 15 |
+
reward: float
|
| 16 |
+
done: bool
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class ChessEnvClient:
|
| 20 |
+
"""
|
| 21 |
+
HTTP client for the Chess OpenEnv environment.
|
| 22 |
+
|
| 23 |
+
Provides a simple interface to interact with a remote chess environment
|
| 24 |
+
server for reinforcement learning.
|
| 25 |
+
|
| 26 |
+
Example usage:
|
| 27 |
+
client = ChessEnvClient("http://localhost:8000")
|
| 28 |
+
obs = client.reset()
|
| 29 |
+
print(f"Legal moves: {obs.legal_moves}")
|
| 30 |
+
|
| 31 |
+
result = client.step(ChessAction(move="e2e4"))
|
| 32 |
+
print(f"Reward: {result.reward}, Done: {result.done}")
|
| 33 |
+
|
| 34 |
+
state = client.state()
|
| 35 |
+
print(f"Move count: {state.step_count}")
|
| 36 |
+
|
| 37 |
+
client.close()
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 30.0):
|
| 41 |
+
"""
|
| 42 |
+
Initialize the chess environment client.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
base_url: URL of the chess environment server
|
| 46 |
+
timeout: Request timeout in seconds
|
| 47 |
+
"""
|
| 48 |
+
self.base_url = base_url.rstrip("/")
|
| 49 |
+
self._client = httpx.Client(timeout=timeout)
|
| 50 |
+
|
| 51 |
+
def reset(
|
| 52 |
+
self,
|
| 53 |
+
seed: Optional[int] = None,
|
| 54 |
+
episode_id: Optional[str] = None,
|
| 55 |
+
fen: Optional[str] = None,
|
| 56 |
+
) -> ChessObservation:
|
| 57 |
+
"""
|
| 58 |
+
Reset the environment and start a new episode.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
seed: Random seed (optional)
|
| 62 |
+
episode_id: Unique episode identifier (optional)
|
| 63 |
+
fen: Starting position in FEN notation (optional)
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Initial observation of the board state
|
| 67 |
+
"""
|
| 68 |
+
payload = {}
|
| 69 |
+
if seed is not None:
|
| 70 |
+
payload["seed"] = seed
|
| 71 |
+
if episode_id is not None:
|
| 72 |
+
payload["episode_id"] = episode_id
|
| 73 |
+
if fen is not None:
|
| 74 |
+
payload["fen"] = fen
|
| 75 |
+
|
| 76 |
+
response = self._client.post(f"{self.base_url}/reset", json=payload)
|
| 77 |
+
response.raise_for_status()
|
| 78 |
+
data = response.json()
|
| 79 |
+
|
| 80 |
+
return self._parse_observation(data)
|
| 81 |
+
|
| 82 |
+
def step(self, action: ChessAction) -> StepResult:
|
| 83 |
+
"""
|
| 84 |
+
Execute a move in the environment.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
action: The chess action (move in UCI format)
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
StepResult with observation, reward, and done flag
|
| 91 |
+
"""
|
| 92 |
+
payload = {"move": action.move}
|
| 93 |
+
response = self._client.post(f"{self.base_url}/step", json=payload)
|
| 94 |
+
response.raise_for_status()
|
| 95 |
+
data = response.json()
|
| 96 |
+
|
| 97 |
+
return StepResult(
|
| 98 |
+
observation=self._parse_observation(data["observation"]),
|
| 99 |
+
reward=data["reward"],
|
| 100 |
+
done=data["done"],
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
def state(self) -> ChessState:
|
| 104 |
+
"""
|
| 105 |
+
Get the current episode state.
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
Current episode state with metadata
|
| 109 |
+
"""
|
| 110 |
+
response = self._client.get(f"{self.base_url}/state")
|
| 111 |
+
response.raise_for_status()
|
| 112 |
+
data = response.json()
|
| 113 |
+
|
| 114 |
+
return ChessState(
|
| 115 |
+
episode_id=data["episode_id"],
|
| 116 |
+
step_count=data["step_count"],
|
| 117 |
+
current_player=data["current_player"],
|
| 118 |
+
fen=data["fen"],
|
| 119 |
+
move_history=data.get("move_history", []),
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
def metadata(self) -> Dict[str, Any]:
|
| 123 |
+
"""
|
| 124 |
+
Get environment metadata.
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
Dictionary with environment configuration
|
| 128 |
+
"""
|
| 129 |
+
response = self._client.get(f"{self.base_url}/metadata")
|
| 130 |
+
response.raise_for_status()
|
| 131 |
+
return response.json()
|
| 132 |
+
|
| 133 |
+
def health(self) -> bool:
|
| 134 |
+
"""
|
| 135 |
+
Check if the server is healthy.
|
| 136 |
+
|
| 137 |
+
Returns:
|
| 138 |
+
True if server is responding
|
| 139 |
+
"""
|
| 140 |
+
try:
|
| 141 |
+
response = self._client.get(f"{self.base_url}/health")
|
| 142 |
+
return response.status_code == 200
|
| 143 |
+
except Exception:
|
| 144 |
+
return False
|
| 145 |
+
|
| 146 |
+
def close(self) -> None:
|
| 147 |
+
"""Close the HTTP client."""
|
| 148 |
+
self._client.close()
|
| 149 |
+
|
| 150 |
+
def __enter__(self):
|
| 151 |
+
return self
|
| 152 |
+
|
| 153 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 154 |
+
self.close()
|
| 155 |
+
|
| 156 |
+
def _parse_observation(self, data: Dict[str, Any]) -> ChessObservation:
|
| 157 |
+
"""Parse observation from JSON response."""
|
| 158 |
+
return ChessObservation(
|
| 159 |
+
fen=data["fen"],
|
| 160 |
+
legal_moves=data["legal_moves"],
|
| 161 |
+
is_check=data.get("is_check", False),
|
| 162 |
+
done=data.get("done", False),
|
| 163 |
+
reward=data.get("reward"),
|
| 164 |
+
result=data.get("result"),
|
| 165 |
+
metadata=data.get("metadata", {}),
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
# Convenience function for quick usage
|
| 170 |
+
def make_env(base_url: str = "http://localhost:8000") -> ChessEnvClient:
|
| 171 |
+
"""
|
| 172 |
+
Create a chess environment client.
|
| 173 |
+
|
| 174 |
+
Args:
|
| 175 |
+
base_url: URL of the chess environment server
|
| 176 |
+
|
| 177 |
+
Returns:
|
| 178 |
+
ChessEnvClient instance
|
| 179 |
+
"""
|
| 180 |
+
return ChessEnvClient(base_url)
|
examples/__init__.py
ADDED
|
File without changes
|
examples/basic_usage.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Basic usage example for the Chess OpenEnv environment.
|
| 3 |
+
|
| 4 |
+
This example shows how to use the chess environment both locally
|
| 5 |
+
(without a server) and via the HTTP client.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import random
|
| 9 |
+
|
| 10 |
+
from moonfish.rl import ChessAction, ChessEnvironment, RewardConfig
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def play_random_game():
|
| 14 |
+
"""Play a game with random moves to demonstrate the environment."""
|
| 15 |
+
print("=== Playing a random game ===\n")
|
| 16 |
+
|
| 17 |
+
# Create environment
|
| 18 |
+
env = ChessEnvironment()
|
| 19 |
+
|
| 20 |
+
# Reset to start a new game
|
| 21 |
+
obs = env.reset(episode_id="random_game_001")
|
| 22 |
+
|
| 23 |
+
print(f"Initial position: {obs.fen}")
|
| 24 |
+
print(f"Legal moves: {len(obs.legal_moves)} available")
|
| 25 |
+
print()
|
| 26 |
+
|
| 27 |
+
move_count = 0
|
| 28 |
+
total_reward = 0.0
|
| 29 |
+
|
| 30 |
+
while not obs.done:
|
| 31 |
+
# Pick a random legal move
|
| 32 |
+
move = random.choice(obs.legal_moves)
|
| 33 |
+
action = ChessAction(move=move)
|
| 34 |
+
|
| 35 |
+
# Execute the move
|
| 36 |
+
obs, reward, done = env.step(action)
|
| 37 |
+
total_reward += reward
|
| 38 |
+
move_count += 1
|
| 39 |
+
|
| 40 |
+
if move_count <= 5 or done:
|
| 41 |
+
print(f"Move {move_count}: {move}")
|
| 42 |
+
print(f" FEN: {obs.fen}")
|
| 43 |
+
print(f" Check: {obs.is_check}, Reward: {reward}")
|
| 44 |
+
if move_count == 5 and not done:
|
| 45 |
+
print(" ... (continuing)")
|
| 46 |
+
print()
|
| 47 |
+
|
| 48 |
+
print(f"\nGame finished after {move_count} moves")
|
| 49 |
+
print(f"Result: {obs.result}")
|
| 50 |
+
print(f"Total reward: {total_reward}")
|
| 51 |
+
|
| 52 |
+
# Check final state
|
| 53 |
+
state = env.state
|
| 54 |
+
print(f"Episode ID: {state.episode_id}")
|
| 55 |
+
print(f"Move history: {state.move_history[:10]}...")
|
| 56 |
+
|
| 57 |
+
env.close()
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def play_specific_opening():
|
| 61 |
+
"""Demonstrate playing specific moves (Italian Game opening)."""
|
| 62 |
+
print("\n=== Playing the Italian Game opening ===\n")
|
| 63 |
+
|
| 64 |
+
env = ChessEnvironment()
|
| 65 |
+
obs = env.reset()
|
| 66 |
+
|
| 67 |
+
opening_moves = ["e2e4", "e7e5", "g1f3", "b8c6", "f1c4"]
|
| 68 |
+
|
| 69 |
+
for i, move in enumerate(opening_moves):
|
| 70 |
+
action = ChessAction(move=move)
|
| 71 |
+
obs, reward, done = env.step(action)
|
| 72 |
+
print(f"{i+1}. {move} -> Check: {obs.is_check}")
|
| 73 |
+
|
| 74 |
+
print(f"\nPosition after opening: {obs.fen}")
|
| 75 |
+
print(f"Legal moves for Black: {len(obs.legal_moves)}")
|
| 76 |
+
print(f"Material: {obs.metadata.get('material', {})}")
|
| 77 |
+
|
| 78 |
+
env.close()
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def demonstrate_illegal_move():
|
| 82 |
+
"""Show how illegal moves are handled."""
|
| 83 |
+
print("\n=== Handling illegal moves ===\n")
|
| 84 |
+
|
| 85 |
+
env = ChessEnvironment()
|
| 86 |
+
obs = env.reset()
|
| 87 |
+
|
| 88 |
+
# Try an illegal move
|
| 89 |
+
illegal_action = ChessAction(move="e2e5") # Can't move pawn 3 squares
|
| 90 |
+
obs, reward, done = env.step(illegal_action)
|
| 91 |
+
|
| 92 |
+
print(f"Attempted illegal move: e2e5")
|
| 93 |
+
print(f"Reward: {reward}") # Should be negative
|
| 94 |
+
print(f"Error: {obs.metadata.get('error', 'None')}")
|
| 95 |
+
print(f"Done: {done}") # Game continues
|
| 96 |
+
|
| 97 |
+
env.close()
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def with_evaluation_rewards():
|
| 101 |
+
"""Show evaluation-based intermediate rewards."""
|
| 102 |
+
print("\n=== Evaluation-based rewards ===\n")
|
| 103 |
+
|
| 104 |
+
config = RewardConfig(
|
| 105 |
+
use_evaluation=True,
|
| 106 |
+
evaluation_scale=0.0001, # Scale down the centipawn values
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
env = ChessEnvironment(reward_config=config)
|
| 110 |
+
obs = env.reset()
|
| 111 |
+
|
| 112 |
+
# Play a few moves and observe evaluation changes
|
| 113 |
+
moves = ["e2e4", "d7d5", "e4d5"] # White wins a pawn
|
| 114 |
+
|
| 115 |
+
for move in moves:
|
| 116 |
+
action = ChessAction(move=move)
|
| 117 |
+
obs, reward, done = env.step(action)
|
| 118 |
+
eval_score = obs.metadata.get("evaluation", 0)
|
| 119 |
+
print(f"Move: {move}, Reward: {reward:.4f}, Eval: {eval_score:.1f}")
|
| 120 |
+
|
| 121 |
+
env.close()
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
if __name__ == "__main__":
|
| 125 |
+
play_random_game()
|
| 126 |
+
play_specific_opening()
|
| 127 |
+
demonstrate_illegal_move()
|
| 128 |
+
with_evaluation_rewards()
|
examples/openenv_training.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
OpenEnv Training Example
|
| 3 |
+
|
| 4 |
+
This example shows how to use the chess environment with the OpenEnv
|
| 5 |
+
client-server pattern, which is useful for:
|
| 6 |
+
- Distributed training across machines
|
| 7 |
+
- Isolated environment execution
|
| 8 |
+
- Integration with OpenEnv-compatible training frameworks
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
# Terminal 1: Start the server
|
| 12 |
+
cd moonfish/rl
|
| 13 |
+
python -m uvicorn server.app:app --host 0.0.0.0 --port 8000
|
| 14 |
+
|
| 15 |
+
# Terminal 2: Run this training script
|
| 16 |
+
python examples/openenv_training.py
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import random
|
| 20 |
+
from moonfish.rl import ChessEnvClient, ChessAction, make_env
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def random_policy(legal_moves: list[str]) -> str:
|
| 24 |
+
"""Simple random policy for demonstration."""
|
| 25 |
+
return random.choice(legal_moves)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def train_with_remote_env():
|
| 29 |
+
"""
|
| 30 |
+
Training loop using the HTTP client (OpenEnv pattern).
|
| 31 |
+
|
| 32 |
+
This pattern is useful when:
|
| 33 |
+
- Environment runs on a different machine
|
| 34 |
+
- You need environment isolation (sandboxing)
|
| 35 |
+
- You're using OpenEnv-compatible training frameworks
|
| 36 |
+
"""
|
| 37 |
+
# Connect to the environment server
|
| 38 |
+
# For local testing, start the server first:
|
| 39 |
+
# python -m uvicorn moonfish.rl.server.app:app --port 8000
|
| 40 |
+
client = make_env("http://localhost:8000")
|
| 41 |
+
|
| 42 |
+
# Check server health
|
| 43 |
+
if not client.health():
|
| 44 |
+
print("Server not running. Start it with:")
|
| 45 |
+
print(" python -m uvicorn moonfish.rl.server.app:app --port 8000")
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
print("Connected to chess environment server")
|
| 49 |
+
print(f"Metadata: {client.metadata()}")
|
| 50 |
+
print()
|
| 51 |
+
|
| 52 |
+
# Training loop
|
| 53 |
+
num_episodes = 5
|
| 54 |
+
|
| 55 |
+
for episode in range(num_episodes):
|
| 56 |
+
# Reset environment
|
| 57 |
+
obs = client.reset()
|
| 58 |
+
episode_reward = 0.0
|
| 59 |
+
|
| 60 |
+
print(f"Episode {episode + 1}")
|
| 61 |
+
|
| 62 |
+
while not obs.done:
|
| 63 |
+
# Select action using policy
|
| 64 |
+
move = random_policy(obs.legal_moves)
|
| 65 |
+
action = ChessAction(move=move)
|
| 66 |
+
|
| 67 |
+
# Step environment
|
| 68 |
+
result = client.step(action)
|
| 69 |
+
obs = result.observation
|
| 70 |
+
episode_reward += result.reward
|
| 71 |
+
|
| 72 |
+
# Safety limit
|
| 73 |
+
state = client.state()
|
| 74 |
+
if state.step_count > 200:
|
| 75 |
+
print(" (truncated at 200 moves)")
|
| 76 |
+
break
|
| 77 |
+
|
| 78 |
+
print(f" Moves: {client.state().step_count}, "
|
| 79 |
+
f"Result: {obs.result or 'ongoing'}, "
|
| 80 |
+
f"Reward: {episode_reward:.2f}")
|
| 81 |
+
|
| 82 |
+
# Cleanup
|
| 83 |
+
client.close()
|
| 84 |
+
print("\nTraining complete!")
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def train_with_local_env():
|
| 88 |
+
"""
|
| 89 |
+
Training loop using local environment (no server needed).
|
| 90 |
+
|
| 91 |
+
This is simpler and faster for single-machine training.
|
| 92 |
+
"""
|
| 93 |
+
from moonfish.rl import ChessEnvironment
|
| 94 |
+
|
| 95 |
+
env = ChessEnvironment(opponent="random")
|
| 96 |
+
|
| 97 |
+
print("Training with local environment (random opponent)")
|
| 98 |
+
print()
|
| 99 |
+
|
| 100 |
+
num_episodes = 5
|
| 101 |
+
|
| 102 |
+
for episode in range(num_episodes):
|
| 103 |
+
obs = env.reset()
|
| 104 |
+
episode_reward = 0.0
|
| 105 |
+
|
| 106 |
+
while not obs.done:
|
| 107 |
+
move = random_policy(obs.legal_moves)
|
| 108 |
+
obs, reward, done = env.step(ChessAction(move=move))
|
| 109 |
+
episode_reward += reward
|
| 110 |
+
|
| 111 |
+
if env.state.step_count > 200:
|
| 112 |
+
break
|
| 113 |
+
|
| 114 |
+
print(f"Episode {episode + 1}: "
|
| 115 |
+
f"Moves={env.state.step_count}, "
|
| 116 |
+
f"Result={obs.result or 'ongoing'}, "
|
| 117 |
+
f"Reward={episode_reward:.2f}")
|
| 118 |
+
|
| 119 |
+
env.close()
|
| 120 |
+
print("\nTraining complete!")
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
if __name__ == "__main__":
|
| 124 |
+
import sys
|
| 125 |
+
|
| 126 |
+
if "--remote" in sys.argv:
|
| 127 |
+
print("=== Remote Environment (OpenEnv HTTP Client) ===\n")
|
| 128 |
+
train_with_remote_env()
|
| 129 |
+
else:
|
| 130 |
+
print("=== Local Environment ===\n")
|
| 131 |
+
train_with_local_env()
|
| 132 |
+
print("\nTo test with HTTP client, run:")
|
| 133 |
+
print(" 1. Start server: python -m uvicorn moonfish.rl.server.app:app --port 8000")
|
| 134 |
+
print(" 2. Run: python examples/openenv_training.py --remote")
|
models.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data models for the Chess OpenEnv environment."""
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
from typing import Any, Dict, List, Optional, Union
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass
|
| 8 |
+
class ChessAction:
|
| 9 |
+
"""
|
| 10 |
+
Represents a chess move action.
|
| 11 |
+
|
| 12 |
+
Attributes:
|
| 13 |
+
move: UCI format move string (e.g., "e2e4", "e7e8q" for promotion)
|
| 14 |
+
"""
|
| 15 |
+
move: str
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class ChessObservation:
|
| 20 |
+
"""
|
| 21 |
+
Represents the observable state of the chess environment.
|
| 22 |
+
|
| 23 |
+
Attributes:
|
| 24 |
+
fen: Board position in FEN notation
|
| 25 |
+
legal_moves: List of legal moves in UCI format
|
| 26 |
+
is_check: Whether the current player is in check
|
| 27 |
+
done: Whether the episode has ended
|
| 28 |
+
reward: Reward value (1.0 for win, -1.0 for loss, 0.0 for draw, None otherwise)
|
| 29 |
+
result: Game result string if game is over (e.g., "1-0", "0-1", "1/2-1/2")
|
| 30 |
+
metadata: Additional information about the position
|
| 31 |
+
"""
|
| 32 |
+
fen: str
|
| 33 |
+
legal_moves: List[str]
|
| 34 |
+
is_check: bool = False
|
| 35 |
+
done: bool = False
|
| 36 |
+
reward: Optional[float] = None
|
| 37 |
+
result: Optional[str] = None
|
| 38 |
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@dataclass
|
| 42 |
+
class ChessState:
|
| 43 |
+
"""
|
| 44 |
+
Tracks episode metadata for the chess environment.
|
| 45 |
+
|
| 46 |
+
Attributes:
|
| 47 |
+
episode_id: Unique identifier for the current episode
|
| 48 |
+
step_count: Number of moves (half-moves) played in current episode
|
| 49 |
+
current_player: "white" or "black"
|
| 50 |
+
fen: Current position in FEN notation
|
| 51 |
+
move_history: List of moves played in UCI format
|
| 52 |
+
"""
|
| 53 |
+
episode_id: str
|
| 54 |
+
step_count: int
|
| 55 |
+
current_player: str
|
| 56 |
+
fen: str
|
| 57 |
+
move_history: List[str] = field(default_factory=list)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@dataclass
|
| 61 |
+
class RewardConfig:
|
| 62 |
+
"""
|
| 63 |
+
Configuration for reward shaping in the chess environment.
|
| 64 |
+
|
| 65 |
+
Attributes:
|
| 66 |
+
win: Reward for winning the game
|
| 67 |
+
loss: Reward for losing the game
|
| 68 |
+
draw: Reward for drawing the game
|
| 69 |
+
illegal_move: Penalty for attempting an illegal move
|
| 70 |
+
use_evaluation: Whether to include position evaluation in rewards
|
| 71 |
+
evaluation_scale: Scale factor for evaluation-based rewards
|
| 72 |
+
"""
|
| 73 |
+
win: float = 1.0
|
| 74 |
+
loss: float = -1.0
|
| 75 |
+
draw: float = 0.0
|
| 76 |
+
illegal_move: float = -0.1
|
| 77 |
+
use_evaluation: bool = False
|
| 78 |
+
evaluation_scale: float = 0.001
|
openenv.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: moonfish_chess
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
outputs/.gitkeep
ADDED
|
File without changes
|
pyproject.toml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "moonfish-chess-env"
|
| 3 |
+
version = "1.0.0"
|
| 4 |
+
description = "Chess RL environment using moonfish engine - OpenEnv compatible"
|
| 5 |
+
requires-python = ">=3.10"
|
| 6 |
+
|
| 7 |
+
dependencies = [
|
| 8 |
+
"chess>=1.10.0",
|
| 9 |
+
"fastapi>=0.100.0",
|
| 10 |
+
"uvicorn[standard]>=0.23.0",
|
| 11 |
+
"httpx>=0.24.0",
|
| 12 |
+
"pydantic>=2.0.0",
|
| 13 |
+
"openenv>=0.1.0",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
[project.scripts]
|
| 17 |
+
server = "server.app:main"
|
| 18 |
+
|
| 19 |
+
[build-system]
|
| 20 |
+
requires = ["hatchling"]
|
| 21 |
+
build-backend = "hatchling.build"
|
server/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Chess OpenEnv server module."""
|
| 2 |
+
|
| 3 |
+
from .chess_environment import ChessEnvironment
|
| 4 |
+
|
| 5 |
+
__all__ = ["ChessEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI server for the Chess OpenEnv environment."""
|
| 2 |
+
|
| 3 |
+
from typing import Any, Dict, Optional
|
| 4 |
+
from dataclasses import asdict
|
| 5 |
+
|
| 6 |
+
from fastapi import FastAPI, HTTPException
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
+
|
| 9 |
+
from ..models import ChessAction, RewardConfig
|
| 10 |
+
from .chess_environment import ChessEnvironment
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# Pydantic models for API requests/responses
|
| 14 |
+
class ResetRequest(BaseModel):
|
| 15 |
+
seed: Optional[int] = None
|
| 16 |
+
episode_id: Optional[str] = None
|
| 17 |
+
fen: Optional[str] = None
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class StepRequest(BaseModel):
|
| 21 |
+
move: str
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class ObservationResponse(BaseModel):
|
| 25 |
+
fen: str
|
| 26 |
+
legal_moves: list[str]
|
| 27 |
+
is_check: bool = False
|
| 28 |
+
done: bool = False
|
| 29 |
+
reward: Optional[float] = None
|
| 30 |
+
result: Optional[str] = None
|
| 31 |
+
metadata: Dict[str, Any] = {}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class StepResponse(BaseModel):
|
| 35 |
+
observation: ObservationResponse
|
| 36 |
+
reward: float
|
| 37 |
+
done: bool
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class StateResponse(BaseModel):
|
| 41 |
+
episode_id: str
|
| 42 |
+
step_count: int
|
| 43 |
+
current_player: str
|
| 44 |
+
fen: str
|
| 45 |
+
move_history: list[str]
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# Create FastAPI app
|
| 49 |
+
app = FastAPI(
|
| 50 |
+
title="Chess OpenEnv",
|
| 51 |
+
description="Chess environment for reinforcement learning using moonfish",
|
| 52 |
+
version="1.0.0",
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Global environment instance (for single-player mode)
|
| 56 |
+
# For multi-player, you'd want a session manager
|
| 57 |
+
_env: Optional[ChessEnvironment] = None
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def get_env() -> ChessEnvironment:
|
| 61 |
+
"""Get or create environment instance."""
|
| 62 |
+
global _env
|
| 63 |
+
if _env is None:
|
| 64 |
+
_env = ChessEnvironment()
|
| 65 |
+
return _env
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
@app.get("/health")
|
| 69 |
+
def health():
|
| 70 |
+
"""Health check endpoint."""
|
| 71 |
+
return {"status": "ok"}
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
@app.get("/metadata")
|
| 75 |
+
def metadata():
|
| 76 |
+
"""Get environment metadata."""
|
| 77 |
+
return get_env().get_metadata()
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
@app.post("/reset", response_model=ObservationResponse)
|
| 81 |
+
def reset(request: ResetRequest):
|
| 82 |
+
"""Reset the environment and start a new episode."""
|
| 83 |
+
env = get_env()
|
| 84 |
+
obs = env.reset(
|
| 85 |
+
seed=request.seed,
|
| 86 |
+
episode_id=request.episode_id,
|
| 87 |
+
fen=request.fen,
|
| 88 |
+
)
|
| 89 |
+
return ObservationResponse(
|
| 90 |
+
fen=obs.fen,
|
| 91 |
+
legal_moves=obs.legal_moves,
|
| 92 |
+
is_check=obs.is_check,
|
| 93 |
+
done=obs.done,
|
| 94 |
+
reward=obs.reward,
|
| 95 |
+
result=obs.result,
|
| 96 |
+
metadata=obs.metadata,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@app.post("/step", response_model=StepResponse)
|
| 101 |
+
def step(request: StepRequest):
|
| 102 |
+
"""Execute a move and return the result."""
|
| 103 |
+
env = get_env()
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
action = ChessAction(move=request.move)
|
| 107 |
+
obs, reward, done = env.step(action)
|
| 108 |
+
except RuntimeError as e:
|
| 109 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 110 |
+
|
| 111 |
+
return StepResponse(
|
| 112 |
+
observation=ObservationResponse(
|
| 113 |
+
fen=obs.fen,
|
| 114 |
+
legal_moves=obs.legal_moves,
|
| 115 |
+
is_check=obs.is_check,
|
| 116 |
+
done=obs.done,
|
| 117 |
+
reward=obs.reward,
|
| 118 |
+
result=obs.result,
|
| 119 |
+
metadata=obs.metadata,
|
| 120 |
+
),
|
| 121 |
+
reward=reward,
|
| 122 |
+
done=done,
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
@app.get("/state", response_model=StateResponse)
|
| 127 |
+
def state():
|
| 128 |
+
"""Get current episode state."""
|
| 129 |
+
env = get_env()
|
| 130 |
+
try:
|
| 131 |
+
s = env.state
|
| 132 |
+
except RuntimeError as e:
|
| 133 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 134 |
+
|
| 135 |
+
return StateResponse(
|
| 136 |
+
episode_id=s.episode_id,
|
| 137 |
+
step_count=s.step_count,
|
| 138 |
+
current_player=s.current_player,
|
| 139 |
+
fen=s.fen,
|
| 140 |
+
move_history=s.move_history,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def main():
|
| 145 |
+
"""Entry point for running the server."""
|
| 146 |
+
import uvicorn
|
| 147 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
if __name__ == "__main__":
|
| 151 |
+
main()
|
server/chess_environment.py
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Chess environment for OpenEnv using moonfish."""
|
| 2 |
+
|
| 3 |
+
import random
|
| 4 |
+
import uuid
|
| 5 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 6 |
+
|
| 7 |
+
import chess
|
| 8 |
+
|
| 9 |
+
from moonfish.psqt import board_evaluation, MG_PIECE_VALUES, count_pieces, get_phase
|
| 10 |
+
from moonfish.lib import search_move
|
| 11 |
+
from ..models import ChessAction, ChessObservation, ChessState, RewardConfig
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ChessEnvironment:
|
| 15 |
+
"""
|
| 16 |
+
Chess environment implementing the OpenEnv interface.
|
| 17 |
+
|
| 18 |
+
Uses python-chess for game logic and moonfish for position evaluation.
|
| 19 |
+
Designed for RL training where an agent plays as one color against
|
| 20 |
+
an opponent (which can be random, moonfish engine, or self-play).
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
def __init__(
|
| 24 |
+
self,
|
| 25 |
+
reward_config: Optional[RewardConfig] = None,
|
| 26 |
+
max_moves: int = 500,
|
| 27 |
+
agent_color: Optional[bool] = None, # None = alternate, True = White, False = Black
|
| 28 |
+
opponent: Optional[str] = None, # None = self-play, "moonfish" = moonfish engine, "random" = random
|
| 29 |
+
opponent_depth: int = 2, # Search depth for moonfish opponent
|
| 30 |
+
):
|
| 31 |
+
"""
|
| 32 |
+
Initialize the chess environment.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
reward_config: Configuration for reward shaping
|
| 36 |
+
max_moves: Maximum half-moves before draw (prevents infinite games)
|
| 37 |
+
agent_color: Which color the RL agent plays (None = alternates each episode)
|
| 38 |
+
opponent: Opponent type - None (self-play), "moonfish", or "random"
|
| 39 |
+
opponent_depth: Search depth when using moonfish as opponent
|
| 40 |
+
"""
|
| 41 |
+
self.reward_config = reward_config or RewardConfig()
|
| 42 |
+
self.max_moves = max_moves
|
| 43 |
+
self.agent_color_setting = agent_color
|
| 44 |
+
self.opponent = opponent
|
| 45 |
+
self.opponent_depth = opponent_depth
|
| 46 |
+
|
| 47 |
+
# Will be set on reset
|
| 48 |
+
self._board: Optional[chess.Board] = None
|
| 49 |
+
self._state: Optional[ChessState] = None
|
| 50 |
+
self._agent_color: bool = chess.WHITE
|
| 51 |
+
|
| 52 |
+
def reset(
|
| 53 |
+
self,
|
| 54 |
+
seed: Optional[int] = None,
|
| 55 |
+
episode_id: Optional[str] = None,
|
| 56 |
+
fen: Optional[str] = None,
|
| 57 |
+
**kwargs
|
| 58 |
+
) -> ChessObservation:
|
| 59 |
+
"""
|
| 60 |
+
Initialize a new chess game episode.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
seed: Random seed (unused for now, chess is deterministic)
|
| 64 |
+
episode_id: Unique identifier for this episode
|
| 65 |
+
fen: Optional starting position in FEN notation
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
Initial observation of the board state
|
| 69 |
+
"""
|
| 70 |
+
# Create new board
|
| 71 |
+
if fen:
|
| 72 |
+
self._board = chess.Board(fen)
|
| 73 |
+
else:
|
| 74 |
+
self._board = chess.Board()
|
| 75 |
+
|
| 76 |
+
# Determine agent color
|
| 77 |
+
if self.agent_color_setting is None:
|
| 78 |
+
# Alternate each episode based on episode_id hash
|
| 79 |
+
if episode_id:
|
| 80 |
+
self._agent_color = hash(episode_id) % 2 == 0
|
| 81 |
+
else:
|
| 82 |
+
self._agent_color = chess.WHITE
|
| 83 |
+
else:
|
| 84 |
+
self._agent_color = self.agent_color_setting
|
| 85 |
+
|
| 86 |
+
# Initialize state
|
| 87 |
+
self._state = ChessState(
|
| 88 |
+
episode_id=episode_id or uuid.uuid4().hex,
|
| 89 |
+
step_count=0,
|
| 90 |
+
current_player="white" if self._board.turn else "black",
|
| 91 |
+
fen=self._board.fen(),
|
| 92 |
+
move_history=[],
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# If agent plays Black and opponent is configured, opponent moves first
|
| 96 |
+
if self.opponent is not None and self._agent_color == chess.BLACK:
|
| 97 |
+
self._make_opponent_move()
|
| 98 |
+
|
| 99 |
+
return self._get_observation()
|
| 100 |
+
|
| 101 |
+
def step(
|
| 102 |
+
self,
|
| 103 |
+
action: ChessAction,
|
| 104 |
+
timeout_s: Optional[float] = None,
|
| 105 |
+
**kwargs
|
| 106 |
+
) -> Tuple[ChessObservation, float, bool]:
|
| 107 |
+
"""
|
| 108 |
+
Execute a chess move and return the resulting state.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
action: The move to make in UCI format (e.g., "e2e4")
|
| 112 |
+
timeout_s: Unused timeout parameter
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
Tuple of (observation, reward, done)
|
| 116 |
+
"""
|
| 117 |
+
if self._board is None or self._state is None:
|
| 118 |
+
raise RuntimeError("Environment not initialized. Call reset() first.")
|
| 119 |
+
|
| 120 |
+
# Parse the move
|
| 121 |
+
try:
|
| 122 |
+
move = chess.Move.from_uci(action.move)
|
| 123 |
+
except ValueError:
|
| 124 |
+
# Invalid move format
|
| 125 |
+
return self._handle_illegal_move(f"Invalid move format: {action.move}")
|
| 126 |
+
|
| 127 |
+
# Check if move is legal
|
| 128 |
+
if move not in self._board.legal_moves:
|
| 129 |
+
return self._handle_illegal_move(f"Illegal move: {action.move}")
|
| 130 |
+
|
| 131 |
+
# Execute the move
|
| 132 |
+
self._board.push(move)
|
| 133 |
+
self._state.step_count += 1
|
| 134 |
+
self._state.move_history.append(action.move)
|
| 135 |
+
self._state.current_player = "white" if self._board.turn else "black"
|
| 136 |
+
self._state.fen = self._board.fen()
|
| 137 |
+
|
| 138 |
+
# Calculate reward and check for game end
|
| 139 |
+
reward, done = self._calculate_reward_and_done()
|
| 140 |
+
|
| 141 |
+
# If game not over and opponent is configured, make opponent move
|
| 142 |
+
if not done and self.opponent is not None:
|
| 143 |
+
self._make_opponent_move()
|
| 144 |
+
# Recalculate after opponent move
|
| 145 |
+
opp_reward, done = self._calculate_reward_and_done()
|
| 146 |
+
# Opponent's reward is negative of ours (zero-sum)
|
| 147 |
+
reward += -opp_reward if done else 0
|
| 148 |
+
|
| 149 |
+
observation = self._get_observation(done=done, reward=reward if done else None)
|
| 150 |
+
|
| 151 |
+
return observation, reward, done
|
| 152 |
+
|
| 153 |
+
@property
|
| 154 |
+
def state(self) -> ChessState:
|
| 155 |
+
"""Return the current episode state."""
|
| 156 |
+
if self._state is None:
|
| 157 |
+
raise RuntimeError("Environment not initialized. Call reset() first.")
|
| 158 |
+
return self._state
|
| 159 |
+
|
| 160 |
+
def close(self) -> None:
|
| 161 |
+
"""Clean up resources."""
|
| 162 |
+
self._board = None
|
| 163 |
+
self._state = None
|
| 164 |
+
|
| 165 |
+
def get_metadata(self) -> Dict[str, Any]:
|
| 166 |
+
"""Return environment metadata."""
|
| 167 |
+
return {
|
| 168 |
+
"name": "chess",
|
| 169 |
+
"version": "1.0.0",
|
| 170 |
+
"max_moves": self.max_moves,
|
| 171 |
+
"reward_config": {
|
| 172 |
+
"win": self.reward_config.win,
|
| 173 |
+
"loss": self.reward_config.loss,
|
| 174 |
+
"draw": self.reward_config.draw,
|
| 175 |
+
"illegal_move": self.reward_config.illegal_move,
|
| 176 |
+
"use_evaluation": self.reward_config.use_evaluation,
|
| 177 |
+
"evaluation_scale": self.reward_config.evaluation_scale,
|
| 178 |
+
},
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
def _get_observation(
|
| 182 |
+
self,
|
| 183 |
+
done: bool = False,
|
| 184 |
+
reward: Optional[float] = None,
|
| 185 |
+
result: Optional[str] = None,
|
| 186 |
+
error: Optional[str] = None,
|
| 187 |
+
) -> ChessObservation:
|
| 188 |
+
"""Build observation from current board state."""
|
| 189 |
+
assert self._board is not None
|
| 190 |
+
|
| 191 |
+
legal_moves = [move.uci() for move in self._board.legal_moves]
|
| 192 |
+
|
| 193 |
+
metadata: Dict[str, Any] = {}
|
| 194 |
+
|
| 195 |
+
# Add evaluation if configured
|
| 196 |
+
if self.reward_config.use_evaluation:
|
| 197 |
+
metadata["evaluation"] = board_evaluation(self._board)
|
| 198 |
+
|
| 199 |
+
# Add material count
|
| 200 |
+
metadata["material"] = self._get_material_count()
|
| 201 |
+
|
| 202 |
+
# Add game phase (0 = opening, 256 = endgame)
|
| 203 |
+
metadata["phase"] = get_phase(self._board)
|
| 204 |
+
metadata["fullmove_number"] = self._board.fullmove_number
|
| 205 |
+
metadata["halfmove_clock"] = self._board.halfmove_clock
|
| 206 |
+
|
| 207 |
+
if error:
|
| 208 |
+
metadata["error"] = error
|
| 209 |
+
|
| 210 |
+
# Determine result string if game is over
|
| 211 |
+
if done and result is None:
|
| 212 |
+
result = self._get_result_string()
|
| 213 |
+
|
| 214 |
+
return ChessObservation(
|
| 215 |
+
fen=self._board.fen(),
|
| 216 |
+
legal_moves=legal_moves,
|
| 217 |
+
is_check=self._board.is_check(),
|
| 218 |
+
done=done,
|
| 219 |
+
reward=reward,
|
| 220 |
+
result=result,
|
| 221 |
+
metadata=metadata,
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
def _calculate_reward_and_done(self) -> Tuple[float, bool]:
|
| 225 |
+
"""Calculate reward and check if episode is done."""
|
| 226 |
+
assert self._board is not None
|
| 227 |
+
|
| 228 |
+
# Check for game end
|
| 229 |
+
if self._board.is_checkmate():
|
| 230 |
+
# The side to move is checkmated, so the previous mover won
|
| 231 |
+
winner = not self._board.turn
|
| 232 |
+
if winner == self._agent_color:
|
| 233 |
+
return self.reward_config.win, True
|
| 234 |
+
else:
|
| 235 |
+
return self.reward_config.loss, True
|
| 236 |
+
|
| 237 |
+
if self._board.is_stalemate():
|
| 238 |
+
return self.reward_config.draw, True
|
| 239 |
+
|
| 240 |
+
if self._board.is_insufficient_material():
|
| 241 |
+
return self.reward_config.draw, True
|
| 242 |
+
|
| 243 |
+
if self._board.is_fifty_moves():
|
| 244 |
+
return self.reward_config.draw, True
|
| 245 |
+
|
| 246 |
+
if self._board.is_repetition(3):
|
| 247 |
+
return self.reward_config.draw, True
|
| 248 |
+
|
| 249 |
+
# Check move limit
|
| 250 |
+
if self._state and self._state.step_count >= self.max_moves:
|
| 251 |
+
return self.reward_config.draw, True
|
| 252 |
+
|
| 253 |
+
# Game continues
|
| 254 |
+
reward = 0.0
|
| 255 |
+
|
| 256 |
+
# Optional: Add evaluation-based intermediate rewards
|
| 257 |
+
if self.reward_config.use_evaluation:
|
| 258 |
+
eval_score = board_evaluation(self._board)
|
| 259 |
+
# Normalize evaluation to agent's perspective
|
| 260 |
+
if self._board.turn != self._agent_color:
|
| 261 |
+
eval_score = -eval_score
|
| 262 |
+
reward = eval_score * self.reward_config.evaluation_scale
|
| 263 |
+
|
| 264 |
+
return reward, False
|
| 265 |
+
|
| 266 |
+
def _handle_illegal_move(self, error_msg: str) -> Tuple[ChessObservation, float, bool]:
|
| 267 |
+
"""Handle an illegal move attempt."""
|
| 268 |
+
observation = self._get_observation(done=False, error=error_msg)
|
| 269 |
+
return observation, self.reward_config.illegal_move, False
|
| 270 |
+
|
| 271 |
+
def _get_result_string(self) -> str:
|
| 272 |
+
"""Get the game result as a string."""
|
| 273 |
+
assert self._board is not None
|
| 274 |
+
|
| 275 |
+
if self._board.is_checkmate():
|
| 276 |
+
return "1-0" if not self._board.turn else "0-1"
|
| 277 |
+
return "1/2-1/2"
|
| 278 |
+
|
| 279 |
+
def _get_material_count(self) -> Dict[str, int]:
|
| 280 |
+
"""Count material for both sides using moonfish piece values."""
|
| 281 |
+
assert self._board is not None
|
| 282 |
+
|
| 283 |
+
# count_pieces returns [wp, bp, wn, bn, wb, bb, wr, br, wq, bq]
|
| 284 |
+
pieces = count_pieces(self._board)
|
| 285 |
+
wp, bp, wn, bn, wb, bb, wr, br, wq, bq = pieces
|
| 286 |
+
|
| 287 |
+
white = (
|
| 288 |
+
wp * MG_PIECE_VALUES[chess.PAWN]
|
| 289 |
+
+ wn * MG_PIECE_VALUES[chess.KNIGHT]
|
| 290 |
+
+ wb * MG_PIECE_VALUES[chess.BISHOP]
|
| 291 |
+
+ wr * MG_PIECE_VALUES[chess.ROOK]
|
| 292 |
+
+ wq * MG_PIECE_VALUES[chess.QUEEN]
|
| 293 |
+
)
|
| 294 |
+
black = (
|
| 295 |
+
bp * MG_PIECE_VALUES[chess.PAWN]
|
| 296 |
+
+ bn * MG_PIECE_VALUES[chess.KNIGHT]
|
| 297 |
+
+ bb * MG_PIECE_VALUES[chess.BISHOP]
|
| 298 |
+
+ br * MG_PIECE_VALUES[chess.ROOK]
|
| 299 |
+
+ bq * MG_PIECE_VALUES[chess.QUEEN]
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
return {"white": white, "black": black}
|
| 303 |
+
|
| 304 |
+
def _make_opponent_move(self) -> None:
|
| 305 |
+
"""Make a move for the opponent using configured strategy."""
|
| 306 |
+
assert self._board is not None
|
| 307 |
+
assert self._state is not None
|
| 308 |
+
|
| 309 |
+
if not list(self._board.legal_moves):
|
| 310 |
+
return # No legal moves (game should be over)
|
| 311 |
+
|
| 312 |
+
if self.opponent == "moonfish":
|
| 313 |
+
# Use moonfish engine to find best move
|
| 314 |
+
move = search_move(self._board, depth=self.opponent_depth)
|
| 315 |
+
elif self.opponent == "random":
|
| 316 |
+
# Pick a random legal move
|
| 317 |
+
move = random.choice(list(self._board.legal_moves))
|
| 318 |
+
else:
|
| 319 |
+
return # No opponent configured
|
| 320 |
+
|
| 321 |
+
# Execute opponent's move
|
| 322 |
+
self._board.push(move)
|
| 323 |
+
self._state.step_count += 1
|
| 324 |
+
self._state.move_history.append(move.uci())
|
| 325 |
+
self._state.current_player = "white" if self._board.turn else "black"
|
| 326 |
+
self._state.fen = self._board.fen()
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|