""" OpenEnv Training Example This example shows how to use the chess environment with the OpenEnv client-server pattern, which is useful for: - Distributed training across machines - Isolated environment execution - Integration with OpenEnv-compatible training frameworks Usage: # Terminal 1: Start the server cd moonfish/rl python -m uvicorn server.app:app --host 0.0.0.0 --port 8000 # Terminal 2: Run this training script python examples/openenv_training.py """ import random from moonfish.rl import ChessAction, make_env def random_policy(legal_moves: list[str]) -> str: """Simple random policy for demonstration.""" return random.choice(legal_moves) def train_with_remote_env(): """ Training loop using the HTTP client (OpenEnv pattern). This pattern is useful when: - Environment runs on a different machine - You need environment isolation (sandboxing) - You're using OpenEnv-compatible training frameworks """ # Connect to the environment server # For local testing, start the server first: # python -m uvicorn moonfish.rl.server.app:app --port 8000 client = make_env("http://localhost:8000") # Check server health if not client.health(): print("Server not running. Start it with:") print(" python -m uvicorn moonfish.rl.server.app:app --port 8000") return print("Connected to chess environment server") print(f"Metadata: {client.metadata()}") print() # Training loop num_episodes = 5 for episode in range(num_episodes): # Reset environment obs = client.reset() episode_reward = 0.0 print(f"Episode {episode + 1}") while not obs.done: # Select action using policy move = random_policy(obs.legal_moves) action = ChessAction(move=move) # Step environment result = client.step(action) obs = result.observation episode_reward += result.reward # Safety limit state = client.state() if state.step_count > 200: print(" (truncated at 200 moves)") break print( f" Moves: {client.state().step_count}, " f"Result: {obs.result or 'ongoing'}, " f"Reward: {episode_reward:.2f}" ) # Cleanup client.close() print("\nTraining complete!") def train_with_local_env(): """ Training loop using local environment (no server needed). This is simpler and faster for single-machine training. """ from moonfish.rl import ChessEnvironment env = ChessEnvironment(opponent="random") print("Training with local environment (random opponent)") print() num_episodes = 5 for episode in range(num_episodes): obs = env.reset() episode_reward = 0.0 while not obs.done: move = random_policy(obs.legal_moves) obs, reward, done = env.step(ChessAction(move=move)) episode_reward += reward if env.state.step_count > 200: break print( f"Episode {episode + 1}: " f"Moves={env.state.step_count}, " f"Result={obs.result or 'ongoing'}, " f"Reward={episode_reward:.2f}" ) env.close() print("\nTraining complete!") if __name__ == "__main__": import sys if "--remote" in sys.argv: print("=== Remote Environment (OpenEnv HTTP Client) ===\n") train_with_remote_env() else: print("=== Local Environment ===\n") train_with_local_env() print("\nTo test with HTTP client, run:") print( " 1. Start server: python -m uvicorn moonfish.rl.server.app:app --port 8000" ) print(" 2. Run: python examples/openenv_training.py --remote")