File size: 2,273 Bytes
3e1f9da
 
 
e5572a6
3e1f9da
 
 
 
 
 
 
 
 
 
b5e858e
3e1f9da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5e858e
3e1f9da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5e858e
3e1f9da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5e858e
3e1f9da
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""Data models for the Chess OpenEnv environment."""

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional


@dataclass
class ChessAction:
    """
    Represents a chess move action.

    Attributes:
        move: UCI format move string (e.g., "e2e4", "e7e8q" for promotion)
    """

    move: str


@dataclass
class ChessObservation:
    """
    Represents the observable state of the chess environment.

    Attributes:
        fen: Board position in FEN notation
        legal_moves: List of legal moves in UCI format
        is_check: Whether the current player is in check
        done: Whether the episode has ended
        reward: Reward value (1.0 for win, -1.0 for loss, 0.0 for draw, None otherwise)
        result: Game result string if game is over (e.g., "1-0", "0-1", "1/2-1/2")
        metadata: Additional information about the position
    """

    fen: str
    legal_moves: List[str]
    is_check: bool = False
    done: bool = False
    reward: Optional[float] = None
    result: Optional[str] = None
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass
class ChessState:
    """
    Tracks episode metadata for the chess environment.

    Attributes:
        episode_id: Unique identifier for the current episode
        step_count: Number of moves (half-moves) played in current episode
        current_player: "white" or "black"
        fen: Current position in FEN notation
        move_history: List of moves played in UCI format
    """

    episode_id: str
    step_count: int
    current_player: str
    fen: str
    move_history: List[str] = field(default_factory=list)


@dataclass
class RewardConfig:
    """
    Configuration for reward shaping in the chess environment.

    Attributes:
        win: Reward for winning the game
        loss: Reward for losing the game
        draw: Reward for drawing the game
        illegal_move: Penalty for attempting an illegal move
        use_evaluation: Whether to include position evaluation in rewards
        evaluation_scale: Scale factor for evaluation-based rewards
    """

    win: float = 1.0
    loss: float = -1.0
    draw: float = 0.0
    illegal_move: float = -0.1
    use_evaluation: bool = False
    evaluation_scale: float = 0.001