Spaces:
Sleeping
Sleeping
Commit ·
3a5b76e
1
Parent(s): 8251fe9
v0.5.0: reduce game_over to -50, disable height breach penalty
Browse filesReward signal was dominated by constant penalties (-500 game over + ~-350 height breach),
leaving only ~5% learnable signal. Now learnable components are ~80% of total reward.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
src/tetris_env/server/game_engine.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
Tetris Environment for OpenEnv.
|
| 3 |
Full game logic with combo scoring reward system.
|
| 4 |
"""
|
| 5 |
-
__version__ = "0.
|
| 6 |
|
| 7 |
import random
|
| 8 |
import copy
|
|
@@ -40,9 +40,9 @@ LINE_REWARDS = {
|
|
| 40 |
|
| 41 |
STEP_PENALTY = -0.1
|
| 42 |
HOLE_PENALTY_MULT = -5
|
| 43 |
-
GAME_OVER_PENALTY = -
|
| 44 |
HEIGHT_BREACH_THRESHOLD = 4
|
| 45 |
-
HEIGHT_BREACH_PENALTY =
|
| 46 |
|
| 47 |
|
| 48 |
def rotate_cw(piece: list[list[int]]) -> list[list[int]]:
|
|
|
|
| 2 |
Tetris Environment for OpenEnv.
|
| 3 |
Full game logic with combo scoring reward system.
|
| 4 |
"""
|
| 5 |
+
__version__ = "0.5.0" # game_over -50, height breach OFF, LR 1e-4
|
| 6 |
|
| 7 |
import random
|
| 8 |
import copy
|
|
|
|
| 40 |
|
| 41 |
STEP_PENALTY = -0.1
|
| 42 |
HOLE_PENALTY_MULT = -5
|
| 43 |
+
GAME_OVER_PENALTY = -50
|
| 44 |
HEIGHT_BREACH_THRESHOLD = 4
|
| 45 |
+
HEIGHT_BREACH_PENALTY = 0 # disabled for initial training
|
| 46 |
|
| 47 |
|
| 48 |
def rotate_cw(piece: list[list[int]]) -> list[list[int]]:
|