luccabb commited on
Commit
3e1f9da
·
verified ·
1 Parent(s): 68a23df

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ gcc \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy the moonfish package and rl module
11
+ COPY . /app/
12
+
13
+ # Install dependencies
14
+ RUN pip install --no-cache-dir \
15
+ chess>=1.10.0 \
16
+ fastapi>=0.100.0 \
17
+ uvicorn[standard]>=0.23.0 \
18
+ httpx>=0.24.0 \
19
+ pydantic>=2.0.0
20
+
21
+ # Install moonfish from the local package
22
+ RUN pip install --no-cache-dir -e /app
23
+
24
+ # Expose port
25
+ EXPOSE 8000
26
+
27
+ # Run the server
28
+ ENV ENABLE_WEB_INTERFACE=true
29
+ CMD ["python", "-m", "uvicorn", "moonfish.rl.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
README.md CHANGED
@@ -1,10 +1,186 @@
1
  ---
2
  title: Moonfish Chess
3
- emoji: 🌖
4
- colorFrom: indigo
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Moonfish Chess
3
+ emoji: ♟️
4
+ colorFrom: gray
5
  colorTo: blue
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
+ base_path: /web
10
  ---
11
 
12
+ # Chess OpenEnv
13
+
14
+ A chess environment for reinforcement learning, built on [moonfish](https://github.com/luccab/moonfish) and compatible with the [OpenEnv](https://github.com/meta-pytorch/OpenEnv) framework.
15
+
16
+ ## Features
17
+
18
+ - **Full Chess Rules**: Legal move generation, checkmate/stalemate detection, draw conditions
19
+ - **Position Evaluation**: PeSTO evaluation function from moonfish for reward shaping
20
+ - **OpenEnv Compatible**: Standard `reset()`, `step()`, `state()` interface
21
+ - **Configurable Rewards**: Win/loss/draw payoffs, illegal move penalties, evaluation-based rewards
22
+ - **HTTP API**: FastAPI server for remote training and multi-agent setups
23
+ - **Containerized**: Docker support for reproducible deployments
24
+
25
+ ## Quick Start
26
+
27
+ ### Local Usage (No Server)
28
+
29
+ ```python
30
+ from moonfish.rl import ChessEnvironment, ChessAction
31
+
32
+ # Create environment
33
+ env = ChessEnvironment()
34
+
35
+ # Start a new game
36
+ obs = env.reset()
37
+ print(f"Legal moves: {obs.legal_moves}")
38
+
39
+ # Make a move
40
+ action = ChessAction(move="e2e4")
41
+ obs, reward, done = env.step(action)
42
+
43
+ print(f"FEN: {obs.fen}")
44
+ print(f"Reward: {reward}, Done: {done}")
45
+ ```
46
+
47
+ ### Client-Server Usage
48
+
49
+ Start the server:
50
+
51
+ ```bash
52
+ cd moonfish/rl
53
+ python -m uvicorn server.app:app --host 0.0.0.0 --port 8000
54
+ ```
55
+
56
+ Connect with the client:
57
+
58
+ ```python
59
+ from moonfish.rl import ChessEnvClient, ChessAction
60
+
61
+ client = ChessEnvClient("http://localhost:8000")
62
+
63
+ obs = client.reset()
64
+ result = client.step(ChessAction(move="e2e4"))
65
+ print(f"Reward: {result.reward}")
66
+
67
+ client.close()
68
+ ```
69
+
70
+ ## Data Models
71
+
72
+ ### ChessAction
73
+ ```python
74
+ @dataclass
75
+ class ChessAction:
76
+ move: str # UCI format: "e2e4", "e7e8q" (promotion)
77
+ ```
78
+
79
+ ### ChessObservation
80
+ ```python
81
+ @dataclass
82
+ class ChessObservation:
83
+ fen: str # Board state in FEN notation
84
+ legal_moves: List[str] # Available moves in UCI format
85
+ is_check: bool # Current player in check
86
+ done: bool # Game over
87
+ reward: Optional[float] # Terminal reward
88
+ result: Optional[str] # "1-0", "0-1", "1/2-1/2"
89
+ metadata: Dict[str, Any] # Evaluation, material, etc.
90
+ ```
91
+
92
+ ### ChessState
93
+ ```python
94
+ @dataclass
95
+ class ChessState:
96
+ episode_id: str # Unique game identifier
97
+ step_count: int # Half-moves played
98
+ current_player: str # "white" or "black"
99
+ fen: str # Current position
100
+ move_history: List[str] # All moves in UCI format
101
+ ```
102
+
103
+ ## Reward Configuration
104
+
105
+ ```python
106
+ from moonfish.rl import ChessEnvironment, RewardConfig
107
+
108
+ config = RewardConfig(
109
+ win=1.0, # Reward for winning
110
+ loss=-1.0, # Penalty for losing
111
+ draw=0.0, # Reward for draw
112
+ illegal_move=-0.1, # Penalty for illegal moves
113
+ use_evaluation=True, # Enable intermediate rewards
114
+ evaluation_scale=0.0001, # Scale for eval-based rewards
115
+ )
116
+
117
+ env = ChessEnvironment(reward_config=config)
118
+ ```
119
+
120
+ ## Docker
121
+
122
+ Build and run:
123
+
124
+ ```bash
125
+ docker build -t chess-openenv .
126
+ docker run -p 8000:8000 chess-openenv
127
+ ```
128
+
129
+ ## Integration with RL Frameworks
130
+
131
+ ### With TorchRL
132
+
133
+ ```python
134
+ from moonfish.rl import ChessEnvironment, ChessAction
135
+
136
+ class ChessTorchRLWrapper:
137
+ def __init__(self):
138
+ self.env = ChessEnvironment()
139
+
140
+ def reset(self):
141
+ obs = self.env.reset()
142
+ return self._obs_to_tensor(obs)
143
+
144
+ def step(self, action_idx):
145
+ move = self._idx_to_move(action_idx)
146
+ obs, reward, done = self.env.step(ChessAction(move=move))
147
+ return self._obs_to_tensor(obs), reward, done
148
+ ```
149
+
150
+ ### With OpenEnv Training Loop
151
+
152
+ ```python
153
+ from moonfish.rl import make_env, ChessAction
154
+ import random
155
+
156
+ client = make_env("http://localhost:8000")
157
+
158
+ for episode in range(100):
159
+ obs = client.reset()
160
+ episode_reward = 0
161
+
162
+ while not obs.done:
163
+ # Your policy here (random for demo)
164
+ move = random.choice(obs.legal_moves)
165
+ result = client.step(ChessAction(move=move))
166
+ obs = result.observation
167
+ episode_reward += result.reward
168
+
169
+ print(f"Episode {episode}: reward={episode_reward}")
170
+
171
+ client.close()
172
+ ```
173
+
174
+ ## API Endpoints
175
+
176
+ | Endpoint | Method | Description |
177
+ |----------|--------|-------------|
178
+ | `/health` | GET | Health check |
179
+ | `/metadata` | GET | Environment configuration |
180
+ | `/reset` | POST | Start new episode |
181
+ | `/step` | POST | Execute a move |
182
+ | `/state` | GET | Get episode metadata |
183
+
184
+ ## License
185
+
186
+ MIT - See the moonfish repository for full license details.
__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Chess OpenEnv - A chess environment for reinforcement learning."""
2
+
3
+ from .models import ChessAction, ChessObservation, ChessState, RewardConfig
4
+ from .client import ChessEnvClient, StepResult, make_env
5
+ from .server.chess_environment import ChessEnvironment
6
+
7
+ __all__ = [
8
+ "ChessAction",
9
+ "ChessObservation",
10
+ "ChessState",
11
+ "RewardConfig",
12
+ "ChessEnvClient",
13
+ "StepResult",
14
+ "make_env",
15
+ "ChessEnvironment",
16
+ ]
17
+
18
+ __version__ = "1.0.0"
client.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Client for the Chess OpenEnv environment."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import httpx
7
+
8
+ from .models import ChessAction, ChessObservation, ChessState
9
+
10
+
11
+ @dataclass
12
+ class StepResult:
13
+ """Result from a step() call."""
14
+ observation: ChessObservation
15
+ reward: float
16
+ done: bool
17
+
18
+
19
+ class ChessEnvClient:
20
+ """
21
+ HTTP client for the Chess OpenEnv environment.
22
+
23
+ Provides a simple interface to interact with a remote chess environment
24
+ server for reinforcement learning.
25
+
26
+ Example usage:
27
+ client = ChessEnvClient("http://localhost:8000")
28
+ obs = client.reset()
29
+ print(f"Legal moves: {obs.legal_moves}")
30
+
31
+ result = client.step(ChessAction(move="e2e4"))
32
+ print(f"Reward: {result.reward}, Done: {result.done}")
33
+
34
+ state = client.state()
35
+ print(f"Move count: {state.step_count}")
36
+
37
+ client.close()
38
+ """
39
+
40
+ def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 30.0):
41
+ """
42
+ Initialize the chess environment client.
43
+
44
+ Args:
45
+ base_url: URL of the chess environment server
46
+ timeout: Request timeout in seconds
47
+ """
48
+ self.base_url = base_url.rstrip("/")
49
+ self._client = httpx.Client(timeout=timeout)
50
+
51
+ def reset(
52
+ self,
53
+ seed: Optional[int] = None,
54
+ episode_id: Optional[str] = None,
55
+ fen: Optional[str] = None,
56
+ ) -> ChessObservation:
57
+ """
58
+ Reset the environment and start a new episode.
59
+
60
+ Args:
61
+ seed: Random seed (optional)
62
+ episode_id: Unique episode identifier (optional)
63
+ fen: Starting position in FEN notation (optional)
64
+
65
+ Returns:
66
+ Initial observation of the board state
67
+ """
68
+ payload = {}
69
+ if seed is not None:
70
+ payload["seed"] = seed
71
+ if episode_id is not None:
72
+ payload["episode_id"] = episode_id
73
+ if fen is not None:
74
+ payload["fen"] = fen
75
+
76
+ response = self._client.post(f"{self.base_url}/reset", json=payload)
77
+ response.raise_for_status()
78
+ data = response.json()
79
+
80
+ return self._parse_observation(data)
81
+
82
+ def step(self, action: ChessAction) -> StepResult:
83
+ """
84
+ Execute a move in the environment.
85
+
86
+ Args:
87
+ action: The chess action (move in UCI format)
88
+
89
+ Returns:
90
+ StepResult with observation, reward, and done flag
91
+ """
92
+ payload = {"move": action.move}
93
+ response = self._client.post(f"{self.base_url}/step", json=payload)
94
+ response.raise_for_status()
95
+ data = response.json()
96
+
97
+ return StepResult(
98
+ observation=self._parse_observation(data["observation"]),
99
+ reward=data["reward"],
100
+ done=data["done"],
101
+ )
102
+
103
+ def state(self) -> ChessState:
104
+ """
105
+ Get the current episode state.
106
+
107
+ Returns:
108
+ Current episode state with metadata
109
+ """
110
+ response = self._client.get(f"{self.base_url}/state")
111
+ response.raise_for_status()
112
+ data = response.json()
113
+
114
+ return ChessState(
115
+ episode_id=data["episode_id"],
116
+ step_count=data["step_count"],
117
+ current_player=data["current_player"],
118
+ fen=data["fen"],
119
+ move_history=data.get("move_history", []),
120
+ )
121
+
122
+ def metadata(self) -> Dict[str, Any]:
123
+ """
124
+ Get environment metadata.
125
+
126
+ Returns:
127
+ Dictionary with environment configuration
128
+ """
129
+ response = self._client.get(f"{self.base_url}/metadata")
130
+ response.raise_for_status()
131
+ return response.json()
132
+
133
+ def health(self) -> bool:
134
+ """
135
+ Check if the server is healthy.
136
+
137
+ Returns:
138
+ True if server is responding
139
+ """
140
+ try:
141
+ response = self._client.get(f"{self.base_url}/health")
142
+ return response.status_code == 200
143
+ except Exception:
144
+ return False
145
+
146
+ def close(self) -> None:
147
+ """Close the HTTP client."""
148
+ self._client.close()
149
+
150
+ def __enter__(self):
151
+ return self
152
+
153
+ def __exit__(self, exc_type, exc_val, exc_tb):
154
+ self.close()
155
+
156
+ def _parse_observation(self, data: Dict[str, Any]) -> ChessObservation:
157
+ """Parse observation from JSON response."""
158
+ return ChessObservation(
159
+ fen=data["fen"],
160
+ legal_moves=data["legal_moves"],
161
+ is_check=data.get("is_check", False),
162
+ done=data.get("done", False),
163
+ reward=data.get("reward"),
164
+ result=data.get("result"),
165
+ metadata=data.get("metadata", {}),
166
+ )
167
+
168
+
169
+ # Convenience function for quick usage
170
+ def make_env(base_url: str = "http://localhost:8000") -> ChessEnvClient:
171
+ """
172
+ Create a chess environment client.
173
+
174
+ Args:
175
+ base_url: URL of the chess environment server
176
+
177
+ Returns:
178
+ ChessEnvClient instance
179
+ """
180
+ return ChessEnvClient(base_url)
examples/__init__.py ADDED
File without changes
examples/basic_usage.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Basic usage example for the Chess OpenEnv environment.
3
+
4
+ This example shows how to use the chess environment both locally
5
+ (without a server) and via the HTTP client.
6
+ """
7
+
8
+ import random
9
+
10
+ from moonfish.rl import ChessAction, ChessEnvironment, RewardConfig
11
+
12
+
13
+ def play_random_game():
14
+ """Play a game with random moves to demonstrate the environment."""
15
+ print("=== Playing a random game ===\n")
16
+
17
+ # Create environment
18
+ env = ChessEnvironment()
19
+
20
+ # Reset to start a new game
21
+ obs = env.reset(episode_id="random_game_001")
22
+
23
+ print(f"Initial position: {obs.fen}")
24
+ print(f"Legal moves: {len(obs.legal_moves)} available")
25
+ print()
26
+
27
+ move_count = 0
28
+ total_reward = 0.0
29
+
30
+ while not obs.done:
31
+ # Pick a random legal move
32
+ move = random.choice(obs.legal_moves)
33
+ action = ChessAction(move=move)
34
+
35
+ # Execute the move
36
+ obs, reward, done = env.step(action)
37
+ total_reward += reward
38
+ move_count += 1
39
+
40
+ if move_count <= 5 or done:
41
+ print(f"Move {move_count}: {move}")
42
+ print(f" FEN: {obs.fen}")
43
+ print(f" Check: {obs.is_check}, Reward: {reward}")
44
+ if move_count == 5 and not done:
45
+ print(" ... (continuing)")
46
+ print()
47
+
48
+ print(f"\nGame finished after {move_count} moves")
49
+ print(f"Result: {obs.result}")
50
+ print(f"Total reward: {total_reward}")
51
+
52
+ # Check final state
53
+ state = env.state
54
+ print(f"Episode ID: {state.episode_id}")
55
+ print(f"Move history: {state.move_history[:10]}...")
56
+
57
+ env.close()
58
+
59
+
60
+ def play_specific_opening():
61
+ """Demonstrate playing specific moves (Italian Game opening)."""
62
+ print("\n=== Playing the Italian Game opening ===\n")
63
+
64
+ env = ChessEnvironment()
65
+ obs = env.reset()
66
+
67
+ opening_moves = ["e2e4", "e7e5", "g1f3", "b8c6", "f1c4"]
68
+
69
+ for i, move in enumerate(opening_moves):
70
+ action = ChessAction(move=move)
71
+ obs, reward, done = env.step(action)
72
+ print(f"{i+1}. {move} -> Check: {obs.is_check}")
73
+
74
+ print(f"\nPosition after opening: {obs.fen}")
75
+ print(f"Legal moves for Black: {len(obs.legal_moves)}")
76
+ print(f"Material: {obs.metadata.get('material', {})}")
77
+
78
+ env.close()
79
+
80
+
81
+ def demonstrate_illegal_move():
82
+ """Show how illegal moves are handled."""
83
+ print("\n=== Handling illegal moves ===\n")
84
+
85
+ env = ChessEnvironment()
86
+ obs = env.reset()
87
+
88
+ # Try an illegal move
89
+ illegal_action = ChessAction(move="e2e5") # Can't move pawn 3 squares
90
+ obs, reward, done = env.step(illegal_action)
91
+
92
+ print(f"Attempted illegal move: e2e5")
93
+ print(f"Reward: {reward}") # Should be negative
94
+ print(f"Error: {obs.metadata.get('error', 'None')}")
95
+ print(f"Done: {done}") # Game continues
96
+
97
+ env.close()
98
+
99
+
100
+ def with_evaluation_rewards():
101
+ """Show evaluation-based intermediate rewards."""
102
+ print("\n=== Evaluation-based rewards ===\n")
103
+
104
+ config = RewardConfig(
105
+ use_evaluation=True,
106
+ evaluation_scale=0.0001, # Scale down the centipawn values
107
+ )
108
+
109
+ env = ChessEnvironment(reward_config=config)
110
+ obs = env.reset()
111
+
112
+ # Play a few moves and observe evaluation changes
113
+ moves = ["e2e4", "d7d5", "e4d5"] # White wins a pawn
114
+
115
+ for move in moves:
116
+ action = ChessAction(move=move)
117
+ obs, reward, done = env.step(action)
118
+ eval_score = obs.metadata.get("evaluation", 0)
119
+ print(f"Move: {move}, Reward: {reward:.4f}, Eval: {eval_score:.1f}")
120
+
121
+ env.close()
122
+
123
+
124
+ if __name__ == "__main__":
125
+ play_random_game()
126
+ play_specific_opening()
127
+ demonstrate_illegal_move()
128
+ with_evaluation_rewards()
examples/openenv_training.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OpenEnv Training Example
3
+
4
+ This example shows how to use the chess environment with the OpenEnv
5
+ client-server pattern, which is useful for:
6
+ - Distributed training across machines
7
+ - Isolated environment execution
8
+ - Integration with OpenEnv-compatible training frameworks
9
+
10
+ Usage:
11
+ # Terminal 1: Start the server
12
+ cd moonfish/rl
13
+ python -m uvicorn server.app:app --host 0.0.0.0 --port 8000
14
+
15
+ # Terminal 2: Run this training script
16
+ python examples/openenv_training.py
17
+ """
18
+
19
+ import random
20
+ from moonfish.rl import ChessEnvClient, ChessAction, make_env
21
+
22
+
23
+ def random_policy(legal_moves: list[str]) -> str:
24
+ """Simple random policy for demonstration."""
25
+ return random.choice(legal_moves)
26
+
27
+
28
+ def train_with_remote_env():
29
+ """
30
+ Training loop using the HTTP client (OpenEnv pattern).
31
+
32
+ This pattern is useful when:
33
+ - Environment runs on a different machine
34
+ - You need environment isolation (sandboxing)
35
+ - You're using OpenEnv-compatible training frameworks
36
+ """
37
+ # Connect to the environment server
38
+ # For local testing, start the server first:
39
+ # python -m uvicorn moonfish.rl.server.app:app --port 8000
40
+ client = make_env("http://localhost:8000")
41
+
42
+ # Check server health
43
+ if not client.health():
44
+ print("Server not running. Start it with:")
45
+ print(" python -m uvicorn moonfish.rl.server.app:app --port 8000")
46
+ return
47
+
48
+ print("Connected to chess environment server")
49
+ print(f"Metadata: {client.metadata()}")
50
+ print()
51
+
52
+ # Training loop
53
+ num_episodes = 5
54
+
55
+ for episode in range(num_episodes):
56
+ # Reset environment
57
+ obs = client.reset()
58
+ episode_reward = 0.0
59
+
60
+ print(f"Episode {episode + 1}")
61
+
62
+ while not obs.done:
63
+ # Select action using policy
64
+ move = random_policy(obs.legal_moves)
65
+ action = ChessAction(move=move)
66
+
67
+ # Step environment
68
+ result = client.step(action)
69
+ obs = result.observation
70
+ episode_reward += result.reward
71
+
72
+ # Safety limit
73
+ state = client.state()
74
+ if state.step_count > 200:
75
+ print(" (truncated at 200 moves)")
76
+ break
77
+
78
+ print(f" Moves: {client.state().step_count}, "
79
+ f"Result: {obs.result or 'ongoing'}, "
80
+ f"Reward: {episode_reward:.2f}")
81
+
82
+ # Cleanup
83
+ client.close()
84
+ print("\nTraining complete!")
85
+
86
+
87
+ def train_with_local_env():
88
+ """
89
+ Training loop using local environment (no server needed).
90
+
91
+ This is simpler and faster for single-machine training.
92
+ """
93
+ from moonfish.rl import ChessEnvironment
94
+
95
+ env = ChessEnvironment(opponent="random")
96
+
97
+ print("Training with local environment (random opponent)")
98
+ print()
99
+
100
+ num_episodes = 5
101
+
102
+ for episode in range(num_episodes):
103
+ obs = env.reset()
104
+ episode_reward = 0.0
105
+
106
+ while not obs.done:
107
+ move = random_policy(obs.legal_moves)
108
+ obs, reward, done = env.step(ChessAction(move=move))
109
+ episode_reward += reward
110
+
111
+ if env.state.step_count > 200:
112
+ break
113
+
114
+ print(f"Episode {episode + 1}: "
115
+ f"Moves={env.state.step_count}, "
116
+ f"Result={obs.result or 'ongoing'}, "
117
+ f"Reward={episode_reward:.2f}")
118
+
119
+ env.close()
120
+ print("\nTraining complete!")
121
+
122
+
123
+ if __name__ == "__main__":
124
+ import sys
125
+
126
+ if "--remote" in sys.argv:
127
+ print("=== Remote Environment (OpenEnv HTTP Client) ===\n")
128
+ train_with_remote_env()
129
+ else:
130
+ print("=== Local Environment ===\n")
131
+ train_with_local_env()
132
+ print("\nTo test with HTTP client, run:")
133
+ print(" 1. Start server: python -m uvicorn moonfish.rl.server.app:app --port 8000")
134
+ print(" 2. Run: python examples/openenv_training.py --remote")
models.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data models for the Chess OpenEnv environment."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict, List, Optional, Union
5
+
6
+
7
+ @dataclass
8
+ class ChessAction:
9
+ """
10
+ Represents a chess move action.
11
+
12
+ Attributes:
13
+ move: UCI format move string (e.g., "e2e4", "e7e8q" for promotion)
14
+ """
15
+ move: str
16
+
17
+
18
+ @dataclass
19
+ class ChessObservation:
20
+ """
21
+ Represents the observable state of the chess environment.
22
+
23
+ Attributes:
24
+ fen: Board position in FEN notation
25
+ legal_moves: List of legal moves in UCI format
26
+ is_check: Whether the current player is in check
27
+ done: Whether the episode has ended
28
+ reward: Reward value (1.0 for win, -1.0 for loss, 0.0 for draw, None otherwise)
29
+ result: Game result string if game is over (e.g., "1-0", "0-1", "1/2-1/2")
30
+ metadata: Additional information about the position
31
+ """
32
+ fen: str
33
+ legal_moves: List[str]
34
+ is_check: bool = False
35
+ done: bool = False
36
+ reward: Optional[float] = None
37
+ result: Optional[str] = None
38
+ metadata: Dict[str, Any] = field(default_factory=dict)
39
+
40
+
41
+ @dataclass
42
+ class ChessState:
43
+ """
44
+ Tracks episode metadata for the chess environment.
45
+
46
+ Attributes:
47
+ episode_id: Unique identifier for the current episode
48
+ step_count: Number of moves (half-moves) played in current episode
49
+ current_player: "white" or "black"
50
+ fen: Current position in FEN notation
51
+ move_history: List of moves played in UCI format
52
+ """
53
+ episode_id: str
54
+ step_count: int
55
+ current_player: str
56
+ fen: str
57
+ move_history: List[str] = field(default_factory=list)
58
+
59
+
60
+ @dataclass
61
+ class RewardConfig:
62
+ """
63
+ Configuration for reward shaping in the chess environment.
64
+
65
+ Attributes:
66
+ win: Reward for winning the game
67
+ loss: Reward for losing the game
68
+ draw: Reward for drawing the game
69
+ illegal_move: Penalty for attempting an illegal move
70
+ use_evaluation: Whether to include position evaluation in rewards
71
+ evaluation_scale: Scale factor for evaluation-based rewards
72
+ """
73
+ win: float = 1.0
74
+ loss: float = -1.0
75
+ draw: float = 0.0
76
+ illegal_move: float = -0.1
77
+ use_evaluation: bool = False
78
+ evaluation_scale: float = 0.001
openenv.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: moonfish_chess
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
outputs/.gitkeep ADDED
File without changes
pyproject.toml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "moonfish-chess-env"
3
+ version = "1.0.0"
4
+ description = "Chess RL environment using moonfish engine - OpenEnv compatible"
5
+ requires-python = ">=3.10"
6
+
7
+ dependencies = [
8
+ "chess>=1.10.0",
9
+ "fastapi>=0.100.0",
10
+ "uvicorn[standard]>=0.23.0",
11
+ "httpx>=0.24.0",
12
+ "pydantic>=2.0.0",
13
+ "openenv>=0.1.0",
14
+ ]
15
+
16
+ [project.scripts]
17
+ server = "server.app:main"
18
+
19
+ [build-system]
20
+ requires = ["hatchling"]
21
+ build-backend = "hatchling.build"
server/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Chess OpenEnv server module."""
2
+
3
+ from .chess_environment import ChessEnvironment
4
+
5
+ __all__ = ["ChessEnvironment"]
server/app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI server for the Chess OpenEnv environment."""
2
+
3
+ from typing import Any, Dict, Optional
4
+ from dataclasses import asdict
5
+
6
+ from fastapi import FastAPI, HTTPException
7
+ from pydantic import BaseModel
8
+
9
+ from ..models import ChessAction, RewardConfig
10
+ from .chess_environment import ChessEnvironment
11
+
12
+
13
+ # Pydantic models for API requests/responses
14
+ class ResetRequest(BaseModel):
15
+ seed: Optional[int] = None
16
+ episode_id: Optional[str] = None
17
+ fen: Optional[str] = None
18
+
19
+
20
+ class StepRequest(BaseModel):
21
+ move: str
22
+
23
+
24
+ class ObservationResponse(BaseModel):
25
+ fen: str
26
+ legal_moves: list[str]
27
+ is_check: bool = False
28
+ done: bool = False
29
+ reward: Optional[float] = None
30
+ result: Optional[str] = None
31
+ metadata: Dict[str, Any] = {}
32
+
33
+
34
+ class StepResponse(BaseModel):
35
+ observation: ObservationResponse
36
+ reward: float
37
+ done: bool
38
+
39
+
40
+ class StateResponse(BaseModel):
41
+ episode_id: str
42
+ step_count: int
43
+ current_player: str
44
+ fen: str
45
+ move_history: list[str]
46
+
47
+
48
+ # Create FastAPI app
49
+ app = FastAPI(
50
+ title="Chess OpenEnv",
51
+ description="Chess environment for reinforcement learning using moonfish",
52
+ version="1.0.0",
53
+ )
54
+
55
+ # Global environment instance (for single-player mode)
56
+ # For multi-player, you'd want a session manager
57
+ _env: Optional[ChessEnvironment] = None
58
+
59
+
60
+ def get_env() -> ChessEnvironment:
61
+ """Get or create environment instance."""
62
+ global _env
63
+ if _env is None:
64
+ _env = ChessEnvironment()
65
+ return _env
66
+
67
+
68
+ @app.get("/health")
69
+ def health():
70
+ """Health check endpoint."""
71
+ return {"status": "ok"}
72
+
73
+
74
+ @app.get("/metadata")
75
+ def metadata():
76
+ """Get environment metadata."""
77
+ return get_env().get_metadata()
78
+
79
+
80
+ @app.post("/reset", response_model=ObservationResponse)
81
+ def reset(request: ResetRequest):
82
+ """Reset the environment and start a new episode."""
83
+ env = get_env()
84
+ obs = env.reset(
85
+ seed=request.seed,
86
+ episode_id=request.episode_id,
87
+ fen=request.fen,
88
+ )
89
+ return ObservationResponse(
90
+ fen=obs.fen,
91
+ legal_moves=obs.legal_moves,
92
+ is_check=obs.is_check,
93
+ done=obs.done,
94
+ reward=obs.reward,
95
+ result=obs.result,
96
+ metadata=obs.metadata,
97
+ )
98
+
99
+
100
+ @app.post("/step", response_model=StepResponse)
101
+ def step(request: StepRequest):
102
+ """Execute a move and return the result."""
103
+ env = get_env()
104
+
105
+ try:
106
+ action = ChessAction(move=request.move)
107
+ obs, reward, done = env.step(action)
108
+ except RuntimeError as e:
109
+ raise HTTPException(status_code=400, detail=str(e))
110
+
111
+ return StepResponse(
112
+ observation=ObservationResponse(
113
+ fen=obs.fen,
114
+ legal_moves=obs.legal_moves,
115
+ is_check=obs.is_check,
116
+ done=obs.done,
117
+ reward=obs.reward,
118
+ result=obs.result,
119
+ metadata=obs.metadata,
120
+ ),
121
+ reward=reward,
122
+ done=done,
123
+ )
124
+
125
+
126
+ @app.get("/state", response_model=StateResponse)
127
+ def state():
128
+ """Get current episode state."""
129
+ env = get_env()
130
+ try:
131
+ s = env.state
132
+ except RuntimeError as e:
133
+ raise HTTPException(status_code=400, detail=str(e))
134
+
135
+ return StateResponse(
136
+ episode_id=s.episode_id,
137
+ step_count=s.step_count,
138
+ current_player=s.current_player,
139
+ fen=s.fen,
140
+ move_history=s.move_history,
141
+ )
142
+
143
+
144
+ def main():
145
+ """Entry point for running the server."""
146
+ import uvicorn
147
+ uvicorn.run(app, host="0.0.0.0", port=8000)
148
+
149
+
150
+ if __name__ == "__main__":
151
+ main()
server/chess_environment.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Chess environment for OpenEnv using moonfish."""
2
+
3
+ import random
4
+ import uuid
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import chess
8
+
9
+ from moonfish.psqt import board_evaluation, MG_PIECE_VALUES, count_pieces, get_phase
10
+ from moonfish.lib import search_move
11
+ from ..models import ChessAction, ChessObservation, ChessState, RewardConfig
12
+
13
+
14
+ class ChessEnvironment:
15
+ """
16
+ Chess environment implementing the OpenEnv interface.
17
+
18
+ Uses python-chess for game logic and moonfish for position evaluation.
19
+ Designed for RL training where an agent plays as one color against
20
+ an opponent (which can be random, moonfish engine, or self-play).
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ reward_config: Optional[RewardConfig] = None,
26
+ max_moves: int = 500,
27
+ agent_color: Optional[bool] = None, # None = alternate, True = White, False = Black
28
+ opponent: Optional[str] = None, # None = self-play, "moonfish" = moonfish engine, "random" = random
29
+ opponent_depth: int = 2, # Search depth for moonfish opponent
30
+ ):
31
+ """
32
+ Initialize the chess environment.
33
+
34
+ Args:
35
+ reward_config: Configuration for reward shaping
36
+ max_moves: Maximum half-moves before draw (prevents infinite games)
37
+ agent_color: Which color the RL agent plays (None = alternates each episode)
38
+ opponent: Opponent type - None (self-play), "moonfish", or "random"
39
+ opponent_depth: Search depth when using moonfish as opponent
40
+ """
41
+ self.reward_config = reward_config or RewardConfig()
42
+ self.max_moves = max_moves
43
+ self.agent_color_setting = agent_color
44
+ self.opponent = opponent
45
+ self.opponent_depth = opponent_depth
46
+
47
+ # Will be set on reset
48
+ self._board: Optional[chess.Board] = None
49
+ self._state: Optional[ChessState] = None
50
+ self._agent_color: bool = chess.WHITE
51
+
52
+ def reset(
53
+ self,
54
+ seed: Optional[int] = None,
55
+ episode_id: Optional[str] = None,
56
+ fen: Optional[str] = None,
57
+ **kwargs
58
+ ) -> ChessObservation:
59
+ """
60
+ Initialize a new chess game episode.
61
+
62
+ Args:
63
+ seed: Random seed (unused for now, chess is deterministic)
64
+ episode_id: Unique identifier for this episode
65
+ fen: Optional starting position in FEN notation
66
+
67
+ Returns:
68
+ Initial observation of the board state
69
+ """
70
+ # Create new board
71
+ if fen:
72
+ self._board = chess.Board(fen)
73
+ else:
74
+ self._board = chess.Board()
75
+
76
+ # Determine agent color
77
+ if self.agent_color_setting is None:
78
+ # Alternate each episode based on episode_id hash
79
+ if episode_id:
80
+ self._agent_color = hash(episode_id) % 2 == 0
81
+ else:
82
+ self._agent_color = chess.WHITE
83
+ else:
84
+ self._agent_color = self.agent_color_setting
85
+
86
+ # Initialize state
87
+ self._state = ChessState(
88
+ episode_id=episode_id or uuid.uuid4().hex,
89
+ step_count=0,
90
+ current_player="white" if self._board.turn else "black",
91
+ fen=self._board.fen(),
92
+ move_history=[],
93
+ )
94
+
95
+ # If agent plays Black and opponent is configured, opponent moves first
96
+ if self.opponent is not None and self._agent_color == chess.BLACK:
97
+ self._make_opponent_move()
98
+
99
+ return self._get_observation()
100
+
101
+ def step(
102
+ self,
103
+ action: ChessAction,
104
+ timeout_s: Optional[float] = None,
105
+ **kwargs
106
+ ) -> Tuple[ChessObservation, float, bool]:
107
+ """
108
+ Execute a chess move and return the resulting state.
109
+
110
+ Args:
111
+ action: The move to make in UCI format (e.g., "e2e4")
112
+ timeout_s: Unused timeout parameter
113
+
114
+ Returns:
115
+ Tuple of (observation, reward, done)
116
+ """
117
+ if self._board is None or self._state is None:
118
+ raise RuntimeError("Environment not initialized. Call reset() first.")
119
+
120
+ # Parse the move
121
+ try:
122
+ move = chess.Move.from_uci(action.move)
123
+ except ValueError:
124
+ # Invalid move format
125
+ return self._handle_illegal_move(f"Invalid move format: {action.move}")
126
+
127
+ # Check if move is legal
128
+ if move not in self._board.legal_moves:
129
+ return self._handle_illegal_move(f"Illegal move: {action.move}")
130
+
131
+ # Execute the move
132
+ self._board.push(move)
133
+ self._state.step_count += 1
134
+ self._state.move_history.append(action.move)
135
+ self._state.current_player = "white" if self._board.turn else "black"
136
+ self._state.fen = self._board.fen()
137
+
138
+ # Calculate reward and check for game end
139
+ reward, done = self._calculate_reward_and_done()
140
+
141
+ # If game not over and opponent is configured, make opponent move
142
+ if not done and self.opponent is not None:
143
+ self._make_opponent_move()
144
+ # Recalculate after opponent move
145
+ opp_reward, done = self._calculate_reward_and_done()
146
+ # Opponent's reward is negative of ours (zero-sum)
147
+ reward += -opp_reward if done else 0
148
+
149
+ observation = self._get_observation(done=done, reward=reward if done else None)
150
+
151
+ return observation, reward, done
152
+
153
+ @property
154
+ def state(self) -> ChessState:
155
+ """Return the current episode state."""
156
+ if self._state is None:
157
+ raise RuntimeError("Environment not initialized. Call reset() first.")
158
+ return self._state
159
+
160
+ def close(self) -> None:
161
+ """Clean up resources."""
162
+ self._board = None
163
+ self._state = None
164
+
165
+ def get_metadata(self) -> Dict[str, Any]:
166
+ """Return environment metadata."""
167
+ return {
168
+ "name": "chess",
169
+ "version": "1.0.0",
170
+ "max_moves": self.max_moves,
171
+ "reward_config": {
172
+ "win": self.reward_config.win,
173
+ "loss": self.reward_config.loss,
174
+ "draw": self.reward_config.draw,
175
+ "illegal_move": self.reward_config.illegal_move,
176
+ "use_evaluation": self.reward_config.use_evaluation,
177
+ "evaluation_scale": self.reward_config.evaluation_scale,
178
+ },
179
+ }
180
+
181
+ def _get_observation(
182
+ self,
183
+ done: bool = False,
184
+ reward: Optional[float] = None,
185
+ result: Optional[str] = None,
186
+ error: Optional[str] = None,
187
+ ) -> ChessObservation:
188
+ """Build observation from current board state."""
189
+ assert self._board is not None
190
+
191
+ legal_moves = [move.uci() for move in self._board.legal_moves]
192
+
193
+ metadata: Dict[str, Any] = {}
194
+
195
+ # Add evaluation if configured
196
+ if self.reward_config.use_evaluation:
197
+ metadata["evaluation"] = board_evaluation(self._board)
198
+
199
+ # Add material count
200
+ metadata["material"] = self._get_material_count()
201
+
202
+ # Add game phase (0 = opening, 256 = endgame)
203
+ metadata["phase"] = get_phase(self._board)
204
+ metadata["fullmove_number"] = self._board.fullmove_number
205
+ metadata["halfmove_clock"] = self._board.halfmove_clock
206
+
207
+ if error:
208
+ metadata["error"] = error
209
+
210
+ # Determine result string if game is over
211
+ if done and result is None:
212
+ result = self._get_result_string()
213
+
214
+ return ChessObservation(
215
+ fen=self._board.fen(),
216
+ legal_moves=legal_moves,
217
+ is_check=self._board.is_check(),
218
+ done=done,
219
+ reward=reward,
220
+ result=result,
221
+ metadata=metadata,
222
+ )
223
+
224
+ def _calculate_reward_and_done(self) -> Tuple[float, bool]:
225
+ """Calculate reward and check if episode is done."""
226
+ assert self._board is not None
227
+
228
+ # Check for game end
229
+ if self._board.is_checkmate():
230
+ # The side to move is checkmated, so the previous mover won
231
+ winner = not self._board.turn
232
+ if winner == self._agent_color:
233
+ return self.reward_config.win, True
234
+ else:
235
+ return self.reward_config.loss, True
236
+
237
+ if self._board.is_stalemate():
238
+ return self.reward_config.draw, True
239
+
240
+ if self._board.is_insufficient_material():
241
+ return self.reward_config.draw, True
242
+
243
+ if self._board.is_fifty_moves():
244
+ return self.reward_config.draw, True
245
+
246
+ if self._board.is_repetition(3):
247
+ return self.reward_config.draw, True
248
+
249
+ # Check move limit
250
+ if self._state and self._state.step_count >= self.max_moves:
251
+ return self.reward_config.draw, True
252
+
253
+ # Game continues
254
+ reward = 0.0
255
+
256
+ # Optional: Add evaluation-based intermediate rewards
257
+ if self.reward_config.use_evaluation:
258
+ eval_score = board_evaluation(self._board)
259
+ # Normalize evaluation to agent's perspective
260
+ if self._board.turn != self._agent_color:
261
+ eval_score = -eval_score
262
+ reward = eval_score * self.reward_config.evaluation_scale
263
+
264
+ return reward, False
265
+
266
+ def _handle_illegal_move(self, error_msg: str) -> Tuple[ChessObservation, float, bool]:
267
+ """Handle an illegal move attempt."""
268
+ observation = self._get_observation(done=False, error=error_msg)
269
+ return observation, self.reward_config.illegal_move, False
270
+
271
+ def _get_result_string(self) -> str:
272
+ """Get the game result as a string."""
273
+ assert self._board is not None
274
+
275
+ if self._board.is_checkmate():
276
+ return "1-0" if not self._board.turn else "0-1"
277
+ return "1/2-1/2"
278
+
279
+ def _get_material_count(self) -> Dict[str, int]:
280
+ """Count material for both sides using moonfish piece values."""
281
+ assert self._board is not None
282
+
283
+ # count_pieces returns [wp, bp, wn, bn, wb, bb, wr, br, wq, bq]
284
+ pieces = count_pieces(self._board)
285
+ wp, bp, wn, bn, wb, bb, wr, br, wq, bq = pieces
286
+
287
+ white = (
288
+ wp * MG_PIECE_VALUES[chess.PAWN]
289
+ + wn * MG_PIECE_VALUES[chess.KNIGHT]
290
+ + wb * MG_PIECE_VALUES[chess.BISHOP]
291
+ + wr * MG_PIECE_VALUES[chess.ROOK]
292
+ + wq * MG_PIECE_VALUES[chess.QUEEN]
293
+ )
294
+ black = (
295
+ bp * MG_PIECE_VALUES[chess.PAWN]
296
+ + bn * MG_PIECE_VALUES[chess.KNIGHT]
297
+ + bb * MG_PIECE_VALUES[chess.BISHOP]
298
+ + br * MG_PIECE_VALUES[chess.ROOK]
299
+ + bq * MG_PIECE_VALUES[chess.QUEEN]
300
+ )
301
+
302
+ return {"white": white, "black": black}
303
+
304
+ def _make_opponent_move(self) -> None:
305
+ """Make a move for the opponent using configured strategy."""
306
+ assert self._board is not None
307
+ assert self._state is not None
308
+
309
+ if not list(self._board.legal_moves):
310
+ return # No legal moves (game should be over)
311
+
312
+ if self.opponent == "moonfish":
313
+ # Use moonfish engine to find best move
314
+ move = search_move(self._board, depth=self.opponent_depth)
315
+ elif self.opponent == "random":
316
+ # Pick a random legal move
317
+ move = random.choice(list(self._board.legal_moves))
318
+ else:
319
+ return # No opponent configured
320
+
321
+ # Execute opponent's move
322
+ self._board.push(move)
323
+ self._state.step_count += 1
324
+ self._state.move_history.append(move.uci())
325
+ self._state.current_player = "white" if self._board.turn else "black"
326
+ self._state.fen = self._board.fen()
uv.lock ADDED
The diff for this file is too large to render. See raw diff