Spaces:
Sleeping
Sleeping
Commit ·
9dd7378
1
Parent(s): 5a39470
feat: decaying height breach penalty based on pieces placed
Browse filesHeight breach penalty now decays by 5 per piece locked:
piece 0 = -50/level, piece 9 = -5/level, piece 10+ = 0.
Teaches model to keep board low early, relaxes later.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
src/tetris_env/server/game_engine.py
CHANGED
|
@@ -86,6 +86,7 @@ class TetrisEnv:
|
|
| 86 |
self.next_piece_name = ""
|
| 87 |
self.next_piece = None
|
| 88 |
self.max_penalized_height = HEIGHT_BREACH_THRESHOLD
|
|
|
|
| 89 |
self._spawn_next()
|
| 90 |
self._spawn_next()
|
| 91 |
|
|
@@ -253,6 +254,7 @@ class TetrisEnv:
|
|
| 253 |
else:
|
| 254 |
# Can't move down — lock piece
|
| 255 |
self._lock_piece()
|
|
|
|
| 256 |
lines = self._clear_lines()
|
| 257 |
if lines > 0:
|
| 258 |
reward += LINE_REWARDS.get(lines, lines * 400)
|
|
@@ -261,6 +263,7 @@ class TetrisEnv:
|
|
| 261 |
else:
|
| 262 |
# Drop action: lock immediately
|
| 263 |
self._lock_piece()
|
|
|
|
| 264 |
lines = self._clear_lines()
|
| 265 |
if lines > 0:
|
| 266 |
reward += LINE_REWARDS.get(lines, lines * 400)
|
|
@@ -273,10 +276,13 @@ class TetrisEnv:
|
|
| 273 |
reward += HOLE_PENALTY_MULT * new_holes
|
| 274 |
|
| 275 |
# One-time penalty for each height level breached above threshold
|
|
|
|
| 276 |
current_height = self._max_height()
|
| 277 |
if current_height > self.max_penalized_height:
|
| 278 |
-
|
| 279 |
-
|
|
|
|
|
|
|
| 280 |
self.max_penalized_height = current_height
|
| 281 |
|
| 282 |
if self.done:
|
|
|
|
| 86 |
self.next_piece_name = ""
|
| 87 |
self.next_piece = None
|
| 88 |
self.max_penalized_height = HEIGHT_BREACH_THRESHOLD
|
| 89 |
+
self.pieces_locked = 0
|
| 90 |
self._spawn_next()
|
| 91 |
self._spawn_next()
|
| 92 |
|
|
|
|
| 254 |
else:
|
| 255 |
# Can't move down — lock piece
|
| 256 |
self._lock_piece()
|
| 257 |
+
self.pieces_locked += 1
|
| 258 |
lines = self._clear_lines()
|
| 259 |
if lines > 0:
|
| 260 |
reward += LINE_REWARDS.get(lines, lines * 400)
|
|
|
|
| 263 |
else:
|
| 264 |
# Drop action: lock immediately
|
| 265 |
self._lock_piece()
|
| 266 |
+
self.pieces_locked += 1
|
| 267 |
lines = self._clear_lines()
|
| 268 |
if lines > 0:
|
| 269 |
reward += LINE_REWARDS.get(lines, lines * 400)
|
|
|
|
| 276 |
reward += HOLE_PENALTY_MULT * new_holes
|
| 277 |
|
| 278 |
# One-time penalty for each height level breached above threshold
|
| 279 |
+
# Decays by 5 per piece locked: piece 0 → -50, piece 9 → -5, piece 10+ → 0
|
| 280 |
current_height = self._max_height()
|
| 281 |
if current_height > self.max_penalized_height:
|
| 282 |
+
penalty_per_level = min(0, HEIGHT_BREACH_PENALTY + 5 * self.pieces_locked)
|
| 283 |
+
if penalty_per_level < 0:
|
| 284 |
+
new_levels = current_height - self.max_penalized_height
|
| 285 |
+
reward += penalty_per_level * new_levels
|
| 286 |
self.max_penalized_height = current_height
|
| 287 |
|
| 288 |
if self.done:
|