OutOfMystic Claude Opus 4.6 commited on
Commit
49913c2
·
1 Parent(s): cbba880

fix: remove height penalty, hole penalty only for new holes

Browse files

Height penalty removed entirely. Hole penalty now only applies when
a step creates new holes, not accumulated every step.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

src/tetris_env/server/game_engine.py CHANGED
@@ -38,7 +38,6 @@ LINE_REWARDS = {
38
  }
39
 
40
  STEP_PENALTY = -1
41
- HEIGHT_PENALTY_MULT = -2
42
  HOLE_PENALTY_MULT = -5
43
  GAME_OVER_PENALTY = -500
44
 
@@ -218,6 +217,8 @@ class TetrisEnv:
218
  self.steps += 1
219
  reward = STEP_PENALTY # base penalty per step
220
 
 
 
221
  action = action.strip().lower()
222
 
223
  if action == "left":
@@ -263,9 +264,10 @@ class TetrisEnv:
263
  self.score += LINE_REWARDS.get(lines, lines * 400)
264
  self._spawn_next()
265
 
266
- # Penalties for board state
267
- reward += HEIGHT_PENALTY_MULT * self._max_height()
268
- reward += HOLE_PENALTY_MULT * self._count_holes()
 
269
 
270
  if self.done:
271
  reward += GAME_OVER_PENALTY
 
38
  }
39
 
40
  STEP_PENALTY = -1
 
41
  HOLE_PENALTY_MULT = -5
42
  GAME_OVER_PENALTY = -500
43
 
 
217
  self.steps += 1
218
  reward = STEP_PENALTY # base penalty per step
219
 
220
+ holes_before = self._count_holes()
221
+
222
  action = action.strip().lower()
223
 
224
  if action == "left":
 
264
  self.score += LINE_REWARDS.get(lines, lines * 400)
265
  self._spawn_next()
266
 
267
+ # Penalty only for NEW holes created by this step
268
+ new_holes = self._count_holes() - holes_before
269
+ if new_holes > 0:
270
+ reward += HOLE_PENALTY_MULT * new_holes
271
 
272
  if self.done:
273
  reward += GAME_OVER_PENALTY