thaihipster commited on
Commit
e5e20e8
·
verified ·
1 Parent(s): 462064f

Upload server/game_2048_environment.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. server/game_2048_environment.py +91 -0
server/game_2048_environment.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 2048 Game Environment Implementation.
3
+
4
+ An OpenEnv environment wrapping the classic 2048 sliding tile puzzle.
5
+ The agent chooses one of four directions (up/down/left/right) each step.
6
+ Reward is the score gained from tile merges on that step.
7
+ """
8
+
9
+ from typing import Any, Optional
10
+ from uuid import uuid4
11
+
12
+ # Support both in-repo and standalone imports
13
+ try:
14
+ from openenv.core.env_server.interfaces import Environment
15
+ from openenv.core.env_server.types import State
16
+
17
+ from ..models import Game2048Action, Game2048Observation, Game2048State
18
+ except ImportError:
19
+ from openenv_core.env_server.interfaces import Environment
20
+ from openenv_core.env_server.types import State
21
+
22
+ from models import Game2048Action, Game2048Observation, Game2048State
23
+
24
+ from .game_2048 import Game2048
25
+
26
+
27
+ class Game2048Environment(Environment[Game2048Action, Game2048Observation, Game2048State]):
28
+ """
29
+ OpenEnv environment for the 2048 game.
30
+
31
+ The agent slides tiles in one of four directions. Equal tiles merge,
32
+ doubling their value and adding to the score. A new tile (2 or 4)
33
+ spawns after each valid move. The episode ends when no moves remain.
34
+ """
35
+
36
+ SUPPORTS_CONCURRENT_SESSIONS: bool = True
37
+
38
+ def __init__(self, seed: Optional[int] = None):
39
+ self.game = Game2048(seed=seed)
40
+ self._state = Game2048State(episode_id=str(uuid4()), step_count=0)
41
+ self._sync_state()
42
+
43
+ def reset(
44
+ self,
45
+ seed: Optional[int] = None,
46
+ episode_id: Optional[str] = None,
47
+ **kwargs: Any,
48
+ ) -> Game2048Observation:
49
+ self.game.reset(seed=seed)
50
+ self._state = Game2048State(
51
+ episode_id=episode_id or str(uuid4()),
52
+ step_count=0,
53
+ )
54
+ self._sync_state()
55
+ return self._build_observation(reward=0.0)
56
+
57
+ def step(
58
+ self,
59
+ action: Game2048Action,
60
+ timeout_s: Optional[float] = None,
61
+ **kwargs: Any,
62
+ ) -> Game2048Observation:
63
+ self._state.step_count += 1
64
+ merge_score, done = self.game.step(action.action)
65
+ self._sync_state()
66
+ return self._build_observation(reward=float(merge_score))
67
+
68
+ def _build_observation(self, reward: float) -> Game2048Observation:
69
+ return Game2048Observation(
70
+ board=[row[:] for row in self.game.board],
71
+ score=self.game.score,
72
+ legal_actions=self.game.legal_actions(),
73
+ max_tile=self.game.max_tile(),
74
+ board_text=self.game.render(),
75
+ reward=reward,
76
+ done=self.game.done,
77
+ metadata={
78
+ "won": self.game.won,
79
+ "step": self._state.step_count,
80
+ },
81
+ )
82
+
83
+ def _sync_state(self) -> None:
84
+ self._state.done = self.game.done
85
+ self._state.won = self.game.won
86
+ self._state.score = self.game.score
87
+ self._state.max_tile = self.game.max_tile()
88
+
89
+ @property
90
+ def state(self) -> State:
91
+ return self._state