OutOfMystic Claude Opus 4.6 commited on
Commit
9dd7378
·
1 Parent(s): 5a39470

feat: decaying height breach penalty based on pieces placed

Browse files

Height breach penalty now decays by 5 per piece locked:
piece 0 = -50/level, piece 9 = -5/level, piece 10+ = 0.
Teaches model to keep board low early, relaxes later.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

src/tetris_env/server/game_engine.py CHANGED
@@ -86,6 +86,7 @@ class TetrisEnv:
86
  self.next_piece_name = ""
87
  self.next_piece = None
88
  self.max_penalized_height = HEIGHT_BREACH_THRESHOLD
 
89
  self._spawn_next()
90
  self._spawn_next()
91
 
@@ -253,6 +254,7 @@ class TetrisEnv:
253
  else:
254
  # Can't move down — lock piece
255
  self._lock_piece()
 
256
  lines = self._clear_lines()
257
  if lines > 0:
258
  reward += LINE_REWARDS.get(lines, lines * 400)
@@ -261,6 +263,7 @@ class TetrisEnv:
261
  else:
262
  # Drop action: lock immediately
263
  self._lock_piece()
 
264
  lines = self._clear_lines()
265
  if lines > 0:
266
  reward += LINE_REWARDS.get(lines, lines * 400)
@@ -273,10 +276,13 @@ class TetrisEnv:
273
  reward += HOLE_PENALTY_MULT * new_holes
274
 
275
  # One-time penalty for each height level breached above threshold
 
276
  current_height = self._max_height()
277
  if current_height > self.max_penalized_height:
278
- new_levels = current_height - self.max_penalized_height
279
- reward += HEIGHT_BREACH_PENALTY * new_levels
 
 
280
  self.max_penalized_height = current_height
281
 
282
  if self.done:
 
86
  self.next_piece_name = ""
87
  self.next_piece = None
88
  self.max_penalized_height = HEIGHT_BREACH_THRESHOLD
89
+ self.pieces_locked = 0
90
  self._spawn_next()
91
  self._spawn_next()
92
 
 
254
  else:
255
  # Can't move down — lock piece
256
  self._lock_piece()
257
+ self.pieces_locked += 1
258
  lines = self._clear_lines()
259
  if lines > 0:
260
  reward += LINE_REWARDS.get(lines, lines * 400)
 
263
  else:
264
  # Drop action: lock immediately
265
  self._lock_piece()
266
+ self.pieces_locked += 1
267
  lines = self._clear_lines()
268
  if lines > 0:
269
  reward += LINE_REWARDS.get(lines, lines * 400)
 
276
  reward += HOLE_PENALTY_MULT * new_holes
277
 
278
  # One-time penalty for each height level breached above threshold
279
+ # Decays by 5 per piece locked: piece 0 → -50, piece 9 → -5, piece 10+ → 0
280
  current_height = self._max_height()
281
  if current_height > self.max_penalized_height:
282
+ penalty_per_level = min(0, HEIGHT_BREACH_PENALTY + 5 * self.pieces_locked)
283
+ if penalty_per_level < 0:
284
+ new_levels = current_height - self.max_penalized_height
285
+ reward += penalty_per_level * new_levels
286
  self.max_penalized_height = current_height
287
 
288
  if self.done: