Spaces:
Sleeping
Sleeping
File size: 2,273 Bytes
3e1f9da e5572a6 3e1f9da b5e858e 3e1f9da b5e858e 3e1f9da b5e858e 3e1f9da b5e858e 3e1f9da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
"""Data models for the Chess OpenEnv environment."""
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class ChessAction:
"""
Represents a chess move action.
Attributes:
move: UCI format move string (e.g., "e2e4", "e7e8q" for promotion)
"""
move: str
@dataclass
class ChessObservation:
"""
Represents the observable state of the chess environment.
Attributes:
fen: Board position in FEN notation
legal_moves: List of legal moves in UCI format
is_check: Whether the current player is in check
done: Whether the episode has ended
reward: Reward value (1.0 for win, -1.0 for loss, 0.0 for draw, None otherwise)
result: Game result string if game is over (e.g., "1-0", "0-1", "1/2-1/2")
metadata: Additional information about the position
"""
fen: str
legal_moves: List[str]
is_check: bool = False
done: bool = False
reward: Optional[float] = None
result: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
@dataclass
class ChessState:
"""
Tracks episode metadata for the chess environment.
Attributes:
episode_id: Unique identifier for the current episode
step_count: Number of moves (half-moves) played in current episode
current_player: "white" or "black"
fen: Current position in FEN notation
move_history: List of moves played in UCI format
"""
episode_id: str
step_count: int
current_player: str
fen: str
move_history: List[str] = field(default_factory=list)
@dataclass
class RewardConfig:
"""
Configuration for reward shaping in the chess environment.
Attributes:
win: Reward for winning the game
loss: Reward for losing the game
draw: Reward for drawing the game
illegal_move: Penalty for attempting an illegal move
use_evaluation: Whether to include position evaluation in rewards
evaluation_scale: Scale factor for evaluation-based rewards
"""
win: float = 1.0
loss: float = -1.0
draw: float = 0.0
illegal_move: float = -0.1
use_evaluation: bool = False
evaluation_scale: float = 0.001
|