VortexedSquirrel commited on
Commit
2ccda46
·
verified ·
1 Parent(s): 3fad79b

Upload models.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. models.py +73 -0
models.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic models for OpenEnv Tetris environment.
3
+ Follows OpenEnv 0.2 spec: Action, Observation, StepResult.
4
+ """
5
+
6
+ from pydantic import BaseModel, Field
7
+ from typing import Optional
8
+ from enum import Enum
9
+
10
+
11
+ class ActionType(str, Enum):
12
+ LEFT = "left"
13
+ RIGHT = "right"
14
+ ROTATE_CW = "rotate_cw"
15
+ ROTATE_CCW = "rotate_ccw"
16
+ DROP = "drop"
17
+ DOWN = "down"
18
+ NOOP = "noop"
19
+
20
+
21
+ class TetrisAction(BaseModel):
22
+ action: ActionType = Field(
23
+ ...,
24
+ description="Action to perform. One of: left, right, rotate_cw, rotate_ccw, drop, down, noop"
25
+ )
26
+
27
+
28
+ class TetrisObservation(BaseModel):
29
+ board: str = Field(..., description="Text representation of the board (10x20 grid)")
30
+ current_piece: str = Field(..., description="Name of current piece (I, O, T, S, Z, L, J)")
31
+ current_piece_shape: str = Field(..., description="Text shape of current piece")
32
+ next_piece: str = Field(..., description="Name of next piece")
33
+ next_piece_shape: str = Field(..., description="Text shape of next piece")
34
+ piece_x: int = Field(..., description="Current piece X position")
35
+ piece_y: int = Field(..., description="Current piece Y position")
36
+ score: int = Field(..., description="Current score")
37
+ total_lines: int = Field(..., description="Total lines cleared")
38
+ steps: int = Field(..., description="Number of steps taken")
39
+ max_height: int = Field(..., description="Height of tallest column")
40
+ holes: int = Field(..., description="Number of holes in the board")
41
+
42
+
43
+ class StepResult(BaseModel):
44
+ observation: TetrisObservation
45
+ reward: float = Field(..., description="Reward for this step")
46
+ done: bool = Field(..., description="Whether the episode is over")
47
+
48
+
49
+ class ResetResult(BaseModel):
50
+ observation: TetrisObservation
51
+
52
+
53
+ class ResetRequest(BaseModel):
54
+ seed: Optional[int] = Field(None, description="Optional random seed for reproducibility")
55
+
56
+
57
+ class EnvInfo(BaseModel):
58
+ name: str = "tetris-env"
59
+ description: str = "Tetris environment for LLM agent training with combo scoring"
60
+ version: str = "0.1.0"
61
+ action_space: list[str] = ["left", "right", "rotate_cw", "rotate_ccw", "drop", "down", "noop"]
62
+ observation_format: str = "text"
63
+ board_size: str = "10x20"
64
+ reward_structure: dict = {
65
+ "1_line": 100,
66
+ "2_lines": 300,
67
+ "3_lines": 700,
68
+ "4_lines_tetris": 1500,
69
+ "step_penalty": -1,
70
+ "height_penalty": "-2 * max_height",
71
+ "hole_penalty": "-5 * holes",
72
+ "game_over": -500,
73
+ }