Spaces:
Sleeping
Sleeping
Upload models.py with huggingface_hub
Browse files
models.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pydantic models for OpenEnv Tetris environment.
|
| 3 |
+
Follows OpenEnv 0.2 spec: Action, Observation, StepResult.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from pydantic import BaseModel, Field
|
| 7 |
+
from typing import Optional
|
| 8 |
+
from enum import Enum
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ActionType(str, Enum):
|
| 12 |
+
LEFT = "left"
|
| 13 |
+
RIGHT = "right"
|
| 14 |
+
ROTATE_CW = "rotate_cw"
|
| 15 |
+
ROTATE_CCW = "rotate_ccw"
|
| 16 |
+
DROP = "drop"
|
| 17 |
+
DOWN = "down"
|
| 18 |
+
NOOP = "noop"
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class TetrisAction(BaseModel):
|
| 22 |
+
action: ActionType = Field(
|
| 23 |
+
...,
|
| 24 |
+
description="Action to perform. One of: left, right, rotate_cw, rotate_ccw, drop, down, noop"
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class TetrisObservation(BaseModel):
|
| 29 |
+
board: str = Field(..., description="Text representation of the board (10x20 grid)")
|
| 30 |
+
current_piece: str = Field(..., description="Name of current piece (I, O, T, S, Z, L, J)")
|
| 31 |
+
current_piece_shape: str = Field(..., description="Text shape of current piece")
|
| 32 |
+
next_piece: str = Field(..., description="Name of next piece")
|
| 33 |
+
next_piece_shape: str = Field(..., description="Text shape of next piece")
|
| 34 |
+
piece_x: int = Field(..., description="Current piece X position")
|
| 35 |
+
piece_y: int = Field(..., description="Current piece Y position")
|
| 36 |
+
score: int = Field(..., description="Current score")
|
| 37 |
+
total_lines: int = Field(..., description="Total lines cleared")
|
| 38 |
+
steps: int = Field(..., description="Number of steps taken")
|
| 39 |
+
max_height: int = Field(..., description="Height of tallest column")
|
| 40 |
+
holes: int = Field(..., description="Number of holes in the board")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class StepResult(BaseModel):
|
| 44 |
+
observation: TetrisObservation
|
| 45 |
+
reward: float = Field(..., description="Reward for this step")
|
| 46 |
+
done: bool = Field(..., description="Whether the episode is over")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class ResetResult(BaseModel):
|
| 50 |
+
observation: TetrisObservation
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class ResetRequest(BaseModel):
|
| 54 |
+
seed: Optional[int] = Field(None, description="Optional random seed for reproducibility")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class EnvInfo(BaseModel):
|
| 58 |
+
name: str = "tetris-env"
|
| 59 |
+
description: str = "Tetris environment for LLM agent training with combo scoring"
|
| 60 |
+
version: str = "0.1.0"
|
| 61 |
+
action_space: list[str] = ["left", "right", "rotate_cw", "rotate_ccw", "drop", "down", "noop"]
|
| 62 |
+
observation_format: str = "text"
|
| 63 |
+
board_size: str = "10x20"
|
| 64 |
+
reward_structure: dict = {
|
| 65 |
+
"1_line": 100,
|
| 66 |
+
"2_lines": 300,
|
| 67 |
+
"3_lines": 700,
|
| 68 |
+
"4_lines_tetris": 1500,
|
| 69 |
+
"step_penalty": -1,
|
| 70 |
+
"height_penalty": "-2 * max_height",
|
| 71 |
+
"hole_penalty": "-5 * holes",
|
| 72 |
+
"game_over": -500,
|
| 73 |
+
}
|