Spaces:

luccabb
/

moonfish_chess

Runtime error

luccabb commited on Jan 22

Commit

b5e858e

verified ·

1 Parent(s): e5572a6

Upload folder using huggingface_hub

Files changed (5) hide show

client.py CHANGED Viewed

@@ -11,6 +11,7 @@ from .models import ChessAction, ChessObservation, ChessState
 @dataclass
 class StepResult:
     """Result from a step() call."""
     observation: ChessObservation
     reward: float
     done: bool

 @dataclass
 class StepResult:
     """Result from a step() call."""
     observation: ChessObservation
     reward: float
     done: bool

examples/openenv_training.py CHANGED Viewed

@@ -75,9 +75,11 @@ def train_with_remote_env():
                 print("  (truncated at 200 moves)")
                 break
-        print(f"  Moves: {client.state().step_count}, "
-              f"Result: {obs.result or 'ongoing'}, "
-              f"Reward: {episode_reward:.2f}")
     # Cleanup
     client.close()
@@ -111,10 +113,12 @@ def train_with_local_env():
             if env.state.step_count > 200:
                 break
-        print(f"Episode {episode + 1}: "
-              f"Moves={env.state.step_count}, "
-              f"Result={obs.result or 'ongoing'}, "
-              f"Reward={episode_reward:.2f}")
     env.close()
     print("\nTraining complete!")
@@ -130,5 +134,7 @@ if __name__ == "__main__":
         print("=== Local Environment ===\n")
         train_with_local_env()
         print("\nTo test with HTTP client, run:")
-        print("  1. Start server: python -m uvicorn moonfish.rl.server.app:app --port 8000")
         print("  2. Run: python examples/openenv_training.py --remote")

                 print("  (truncated at 200 moves)")
                 break
+        print(
+            f"  Moves: {client.state().step_count}, "
+            f"Result: {obs.result or 'ongoing'}, "
+            f"Reward: {episode_reward:.2f}"
+        )
     # Cleanup
     client.close()
             if env.state.step_count > 200:
                 break
+        print(
+            f"Episode {episode + 1}: "
+            f"Moves={env.state.step_count}, "
+            f"Result={obs.result or 'ongoing'}, "
+            f"Reward={episode_reward:.2f}"
+        )
     env.close()
     print("\nTraining complete!")
         print("=== Local Environment ===\n")
         train_with_local_env()
         print("\nTo test with HTTP client, run:")
+        print(
+            "  1. Start server: python -m uvicorn moonfish.rl.server.app:app --port 8000"
+        )
         print("  2. Run: python examples/openenv_training.py --remote")

models.py CHANGED Viewed

@@ -12,6 +12,7 @@ class ChessAction:
     Attributes:
         move: UCI format move string (e.g., "e2e4", "e7e8q" for promotion)
     """
     move: str
@@ -29,6 +30,7 @@ class ChessObservation:
         result: Game result string if game is over (e.g., "1-0", "0-1", "1/2-1/2")
         metadata: Additional information about the position
     """
     fen: str
     legal_moves: List[str]
     is_check: bool = False
@@ -50,6 +52,7 @@ class ChessState:
         fen: Current position in FEN notation
         move_history: List of moves played in UCI format
     """
     episode_id: str
     step_count: int
     current_player: str
@@ -70,6 +73,7 @@ class RewardConfig:
         use_evaluation: Whether to include position evaluation in rewards
         evaluation_scale: Scale factor for evaluation-based rewards
     """
     win: float = 1.0
     loss: float = -1.0
     draw: float = 0.0

     Attributes:
         move: UCI format move string (e.g., "e2e4", "e7e8q" for promotion)
     """
     move: str
         result: Game result string if game is over (e.g., "1-0", "0-1", "1/2-1/2")
         metadata: Additional information about the position
     """
     fen: str
     legal_moves: List[str]
     is_check: bool = False
         fen: Current position in FEN notation
         move_history: List of moves played in UCI format
     """
     episode_id: str
     step_count: int
     current_player: str
         use_evaluation: Whether to include position evaluation in rewards
         evaluation_scale: Scale factor for evaluation-based rewards
     """
     win: float = 1.0
     loss: float = -1.0
     draw: float = 0.0

server/app.py CHANGED Viewed

@@ -193,6 +193,7 @@ def state():
 def main():
     """Entry point for running the server."""
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

 def main():
     """Entry point for running the server."""
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

server/chess_environment.py CHANGED Viewed

@@ -24,8 +24,12 @@ class ChessEnvironment:
         self,
         reward_config: Optional[RewardConfig] = None,
         max_moves: int = 500,
-        agent_color: Optional[bool] = None,  # None = alternate, True = White, False = Black
-        opponent: Optional[str] = None,  # None = self-play, "moonfish" = moonfish engine, "random" = random
         opponent_depth: int = 2,  # Search depth for moonfish opponent
     ):
         """
@@ -54,7 +58,7 @@ class ChessEnvironment:
         seed: Optional[int] = None,
         episode_id: Optional[str] = None,
         fen: Optional[str] = None,
-        **kwargs
     ) -> ChessObservation:
         """
         Initialize a new chess game episode.
@@ -99,10 +103,7 @@ class ChessEnvironment:
         return self._get_observation()
     def step(
-        self,
-        action: ChessAction,
-        timeout_s: Optional[float] = None,
-        **kwargs
     ) -> Tuple[ChessObservation, float, bool]:
         """
         Execute a chess move and return the resulting state.
@@ -263,7 +264,9 @@ class ChessEnvironment:
         return reward, False
-    def _handle_illegal_move(self, error_msg: str) -> Tuple[ChessObservation, float, bool]:
         """Handle an illegal move attempt."""
         observation = self._get_observation(done=False, error=error_msg)
         return observation, self.reward_config.illegal_move, False

         self,
         reward_config: Optional[RewardConfig] = None,
         max_moves: int = 500,
+        agent_color: Optional[
+            bool
+        ] = None,  # None = alternate, True = White, False = Black
+        opponent: Optional[
+            str
+        ] = None,  # None = self-play, "moonfish" = moonfish engine, "random" = random
         opponent_depth: int = 2,  # Search depth for moonfish opponent
     ):
         """
         seed: Optional[int] = None,
         episode_id: Optional[str] = None,
         fen: Optional[str] = None,
+        **kwargs,
     ) -> ChessObservation:
         """
         Initialize a new chess game episode.
         return self._get_observation()
     def step(
+        self, action: ChessAction, timeout_s: Optional[float] = None, **kwargs
     ) -> Tuple[ChessObservation, float, bool]:
         """
         Execute a chess move and return the resulting state.
         return reward, False
+    def _handle_illegal_move(
+        self, error_msg: str
+    ) -> Tuple[ChessObservation, float, bool]:
         """Handle an illegal move attempt."""
         observation = self._get_observation(done=False, error=error_msg)
         return observation, self.reward_config.illegal_move, False