File size: 3,892 Bytes
3e1f9da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5572a6
3e1f9da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5e858e
 
 
 
 
3e1f9da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5e858e
 
 
 
 
 
3e1f9da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5e858e
 
 
3e1f9da
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
OpenEnv Training Example

This example shows how to use the chess environment with the OpenEnv
client-server pattern, which is useful for:
- Distributed training across machines
- Isolated environment execution
- Integration with OpenEnv-compatible training frameworks

Usage:
    # Terminal 1: Start the server
    cd moonfish/rl
    python -m uvicorn server.app:app --host 0.0.0.0 --port 8000

    # Terminal 2: Run this training script
    python examples/openenv_training.py
"""

import random
from moonfish.rl import ChessAction, make_env


def random_policy(legal_moves: list[str]) -> str:
    """Simple random policy for demonstration."""
    return random.choice(legal_moves)


def train_with_remote_env():
    """
    Training loop using the HTTP client (OpenEnv pattern).

    This pattern is useful when:
    - Environment runs on a different machine
    - You need environment isolation (sandboxing)
    - You're using OpenEnv-compatible training frameworks
    """
    # Connect to the environment server
    # For local testing, start the server first:
    #   python -m uvicorn moonfish.rl.server.app:app --port 8000
    client = make_env("http://localhost:8000")

    # Check server health
    if not client.health():
        print("Server not running. Start it with:")
        print("  python -m uvicorn moonfish.rl.server.app:app --port 8000")
        return

    print("Connected to chess environment server")
    print(f"Metadata: {client.metadata()}")
    print()

    # Training loop
    num_episodes = 5

    for episode in range(num_episodes):
        # Reset environment
        obs = client.reset()
        episode_reward = 0.0

        print(f"Episode {episode + 1}")

        while not obs.done:
            # Select action using policy
            move = random_policy(obs.legal_moves)
            action = ChessAction(move=move)

            # Step environment
            result = client.step(action)
            obs = result.observation
            episode_reward += result.reward

            # Safety limit
            state = client.state()
            if state.step_count > 200:
                print("  (truncated at 200 moves)")
                break

        print(
            f"  Moves: {client.state().step_count}, "
            f"Result: {obs.result or 'ongoing'}, "
            f"Reward: {episode_reward:.2f}"
        )

    # Cleanup
    client.close()
    print("\nTraining complete!")


def train_with_local_env():
    """
    Training loop using local environment (no server needed).

    This is simpler and faster for single-machine training.
    """
    from moonfish.rl import ChessEnvironment

    env = ChessEnvironment(opponent="random")

    print("Training with local environment (random opponent)")
    print()

    num_episodes = 5

    for episode in range(num_episodes):
        obs = env.reset()
        episode_reward = 0.0

        while not obs.done:
            move = random_policy(obs.legal_moves)
            obs, reward, done = env.step(ChessAction(move=move))
            episode_reward += reward

            if env.state.step_count > 200:
                break

        print(
            f"Episode {episode + 1}: "
            f"Moves={env.state.step_count}, "
            f"Result={obs.result or 'ongoing'}, "
            f"Reward={episode_reward:.2f}"
        )

    env.close()
    print("\nTraining complete!")


if __name__ == "__main__":
    import sys

    if "--remote" in sys.argv:
        print("=== Remote Environment (OpenEnv HTTP Client) ===\n")
        train_with_remote_env()
    else:
        print("=== Local Environment ===\n")
        train_with_local_env()
        print("\nTo test with HTTP client, run:")
        print(
            "  1. Start server: python -m uvicorn moonfish.rl.server.app:app --port 8000"
        )
        print("  2. Run: python examples/openenv_training.py --remote")