moonfish_chess / examples /openenv_training.py
luccabb's picture
Upload folder using huggingface_hub
b5e858e verified
"""
OpenEnv Training Example
This example shows how to use the chess environment with the OpenEnv
client-server pattern, which is useful for:
- Distributed training across machines
- Isolated environment execution
- Integration with OpenEnv-compatible training frameworks
Usage:
# Terminal 1: Start the server
cd moonfish/rl
python -m uvicorn server.app:app --host 0.0.0.0 --port 8000
# Terminal 2: Run this training script
python examples/openenv_training.py
"""
import random
from moonfish.rl import ChessAction, make_env
def random_policy(legal_moves: list[str]) -> str:
"""Simple random policy for demonstration."""
return random.choice(legal_moves)
def train_with_remote_env():
"""
Training loop using the HTTP client (OpenEnv pattern).
This pattern is useful when:
- Environment runs on a different machine
- You need environment isolation (sandboxing)
- You're using OpenEnv-compatible training frameworks
"""
# Connect to the environment server
# For local testing, start the server first:
# python -m uvicorn moonfish.rl.server.app:app --port 8000
client = make_env("http://localhost:8000")
# Check server health
if not client.health():
print("Server not running. Start it with:")
print(" python -m uvicorn moonfish.rl.server.app:app --port 8000")
return
print("Connected to chess environment server")
print(f"Metadata: {client.metadata()}")
print()
# Training loop
num_episodes = 5
for episode in range(num_episodes):
# Reset environment
obs = client.reset()
episode_reward = 0.0
print(f"Episode {episode + 1}")
while not obs.done:
# Select action using policy
move = random_policy(obs.legal_moves)
action = ChessAction(move=move)
# Step environment
result = client.step(action)
obs = result.observation
episode_reward += result.reward
# Safety limit
state = client.state()
if state.step_count > 200:
print(" (truncated at 200 moves)")
break
print(
f" Moves: {client.state().step_count}, "
f"Result: {obs.result or 'ongoing'}, "
f"Reward: {episode_reward:.2f}"
)
# Cleanup
client.close()
print("\nTraining complete!")
def train_with_local_env():
"""
Training loop using local environment (no server needed).
This is simpler and faster for single-machine training.
"""
from moonfish.rl import ChessEnvironment
env = ChessEnvironment(opponent="random")
print("Training with local environment (random opponent)")
print()
num_episodes = 5
for episode in range(num_episodes):
obs = env.reset()
episode_reward = 0.0
while not obs.done:
move = random_policy(obs.legal_moves)
obs, reward, done = env.step(ChessAction(move=move))
episode_reward += reward
if env.state.step_count > 200:
break
print(
f"Episode {episode + 1}: "
f"Moves={env.state.step_count}, "
f"Result={obs.result or 'ongoing'}, "
f"Reward={episode_reward:.2f}"
)
env.close()
print("\nTraining complete!")
if __name__ == "__main__":
import sys
if "--remote" in sys.argv:
print("=== Remote Environment (OpenEnv HTTP Client) ===\n")
train_with_remote_env()
else:
print("=== Local Environment ===\n")
train_with_local_env()
print("\nTo test with HTTP client, run:")
print(
" 1. Start server: python -m uvicorn moonfish.rl.server.app:app --port 8000"
)
print(" 2. Run: python examples/openenv_training.py --remote")